{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6508848779916296, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "accuracy": 10.0586, "active_queue_size": 16384.0, "cl_loss": 198.3139, "doc_norm": 8.4422, "encoder_q-embeddings": 34012.1836, "encoder_q-layer.0": 40080.4375, "encoder_q-layer.1": 31278.6934, "encoder_q-layer.10": 85206.0391, "encoder_q-layer.11": 56446.2188, "encoder_q-layer.2": 35660.2852, "encoder_q-layer.3": 37785.2695, "encoder_q-layer.4": 43934.3633, "encoder_q-layer.5": 48827.8711, "encoder_q-layer.6": 64152.1016, "encoder_q-layer.7": 75849.7656, "encoder_q-layer.8": 92344.8203, "encoder_q-layer.9": 71861.0234, "epoch": 0.0, "inbatch_neg_score": 39.1702, "inbatch_pos_score": 47.0625, "learning_rate": 5.000000000000001e-07, "loss": 198.3139, "norm_diff": 0.2693, "norm_loss": 0.0, "num_token_doc": 66.9186, "num_token_overlap": 11.6815, "num_token_query": 31.5023, "num_token_union": 65.2765, "num_word_context": 202.5435, "num_word_doc": 49.9168, "num_word_query": 23.4251, "postclip_grad_norm": 1.0, "preclip_grad_norm": 80237.6406, "preclip_grad_norm_avg": 0.0007, "q@queue_neg_score": 39.1875, "query_norm": 8.1729, "queue_k_norm": 8.4213, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5023, "sent_len_1": 66.9186, "sent_len_max_0": 127.5187, "sent_len_max_1": 191.4038, "stdk": 0.1815, "stdq": 0.1973, "stdqueue_k": 0.1803, "stdqueue_q": 0.0, "step": 100 }, { "accuracy": 11.9141, "active_queue_size": 16384.0, "cl_loss": 126.7731, "doc_norm": 8.3619, "encoder_q-embeddings": 8859.335, "encoder_q-layer.0": 8840.1211, "encoder_q-layer.1": 10646.1582, "encoder_q-layer.10": 23064.5742, "encoder_q-layer.11": 23675.0859, "encoder_q-layer.2": 12845.8271, "encoder_q-layer.3": 12136.9785, "encoder_q-layer.4": 13058.8594, "encoder_q-layer.5": 13647.9414, "encoder_q-layer.6": 15704.0957, "encoder_q-layer.7": 17081.0645, "encoder_q-layer.8": 21012.6426, "encoder_q-layer.9": 16744.334, "epoch": 0.0, "inbatch_neg_score": 36.2075, "inbatch_pos_score": 40.7812, "learning_rate": 1.0000000000000002e-06, "loss": 126.7731, "norm_diff": 1.0126, "norm_loss": 0.0, "num_token_doc": 66.6737, "num_token_overlap": 11.6393, "num_token_query": 31.3649, "num_token_union": 65.0854, "num_word_context": 202.4328, "num_word_doc": 49.7477, "num_word_query": 23.2867, "postclip_grad_norm": 1.0, "preclip_grad_norm": 21989.7987, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 36.3125, "query_norm": 7.3493, "queue_k_norm": 8.353, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3649, "sent_len_1": 66.6737, "sent_len_max_0": 127.4213, "sent_len_max_1": 189.4462, "stdk": 0.1788, "stdq": 0.1459, "stdqueue_k": 0.1777, "stdqueue_q": 0.0, "step": 200 }, { "accuracy": 11.5234, "active_queue_size": 16384.0, "cl_loss": 75.7738, "doc_norm": 8.2099, "encoder_q-embeddings": 4333.0991, "encoder_q-layer.0": 3796.3391, "encoder_q-layer.1": 4233.2632, "encoder_q-layer.10": 10519.7207, "encoder_q-layer.11": 16970.6094, "encoder_q-layer.2": 4614.438, "encoder_q-layer.3": 5140.3179, "encoder_q-layer.4": 5676.0815, "encoder_q-layer.5": 5672.5703, "encoder_q-layer.6": 7048.854, "encoder_q-layer.7": 7155.769, "encoder_q-layer.8": 7888.3301, "encoder_q-layer.9": 7178.9526, "epoch": 0.0, "inbatch_neg_score": 34.8619, "inbatch_pos_score": 37.6875, "learning_rate": 1.5e-06, "loss": 75.7738, "norm_diff": 1.1714, "norm_loss": 0.0, "num_token_doc": 66.8563, "num_token_overlap": 11.6963, "num_token_query": 31.3209, "num_token_union": 65.1289, "num_word_context": 202.4479, "num_word_doc": 49.8888, "num_word_query": 23.266, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11801.3358, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 34.7812, "query_norm": 7.0385, "queue_k_norm": 8.2281, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3209, "sent_len_1": 66.8563, "sent_len_max_0": 127.5713, "sent_len_max_1": 187.9338, "stdk": 0.1718, "stdq": 0.1166, "stdqueue_k": 0.1735, "stdqueue_q": 0.0, "step": 300 }, { "accuracy": 12.3047, "active_queue_size": 16384.0, "cl_loss": 54.1969, "doc_norm": 8.0743, "encoder_q-embeddings": 3614.333, "encoder_q-layer.0": 3260.2188, "encoder_q-layer.1": 3537.0071, "encoder_q-layer.10": 6719.2041, "encoder_q-layer.11": 12807.5518, "encoder_q-layer.2": 3964.1372, "encoder_q-layer.3": 3790.8577, "encoder_q-layer.4": 3854.8684, "encoder_q-layer.5": 3744.8113, "encoder_q-layer.6": 4142.2153, "encoder_q-layer.7": 4054.1531, "encoder_q-layer.8": 4914.6006, "encoder_q-layer.9": 4219.4619, "epoch": 0.0, "inbatch_neg_score": 32.8551, "inbatch_pos_score": 35.0312, "learning_rate": 2.0000000000000003e-06, "loss": 54.1969, "norm_diff": 1.0951, "norm_loss": 0.0, "num_token_doc": 66.7738, "num_token_overlap": 11.7376, "num_token_query": 31.5599, "num_token_union": 65.2493, "num_word_context": 202.328, "num_word_doc": 49.8327, "num_word_query": 23.4458, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8814.5836, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 32.8125, "query_norm": 6.9792, "queue_k_norm": 8.0736, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5599, "sent_len_1": 66.7738, "sent_len_max_0": 127.5438, "sent_len_max_1": 189.2325, "stdk": 0.1673, "stdq": 0.1033, "stdqueue_k": 0.1679, "stdqueue_q": 0.0, "step": 400 }, { "accuracy": 14.2578, "active_queue_size": 16384.0, "cl_loss": 41.8214, "doc_norm": 7.8789, "encoder_q-embeddings": 4769.9624, "encoder_q-layer.0": 4809.1523, "encoder_q-layer.1": 3964.0979, "encoder_q-layer.10": 5527.8242, "encoder_q-layer.11": 9796.7119, "encoder_q-layer.2": 3633.8623, "encoder_q-layer.3": 3572.8479, "encoder_q-layer.4": 3661.8726, "encoder_q-layer.5": 3591.7878, "encoder_q-layer.6": 3618.5288, "encoder_q-layer.7": 4019.6028, "encoder_q-layer.8": 4792.2295, "encoder_q-layer.9": 3532.927, "epoch": 0.0, "inbatch_neg_score": 30.8726, "inbatch_pos_score": 32.5938, "learning_rate": 2.5e-06, "loss": 41.8214, "norm_diff": 0.7405, "norm_loss": 0.0, "num_token_doc": 66.8822, "num_token_overlap": 11.6809, "num_token_query": 31.4022, "num_token_union": 65.1365, "num_word_context": 202.3013, "num_word_doc": 49.8447, "num_word_query": 23.3128, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7621.6647, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 30.8125, "query_norm": 7.1384, "queue_k_norm": 7.8994, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4022, "sent_len_1": 66.8822, "sent_len_max_0": 127.3912, "sent_len_max_1": 192.955, "stdk": 0.1603, "stdq": 0.0993, "stdqueue_k": 0.1614, "stdqueue_q": 0.0, "step": 500 }, { "accuracy": 14.4531, "active_queue_size": 16384.0, "cl_loss": 33.6179, "doc_norm": 7.7201, "encoder_q-embeddings": 2756.447, "encoder_q-layer.0": 2508.3518, "encoder_q-layer.1": 2662.5742, "encoder_q-layer.10": 7359.9946, "encoder_q-layer.11": 10993.4375, "encoder_q-layer.2": 3020.8203, "encoder_q-layer.3": 3222.269, "encoder_q-layer.4": 3580.5227, "encoder_q-layer.5": 3722.4431, "encoder_q-layer.6": 3841.1497, "encoder_q-layer.7": 4148.3086, "encoder_q-layer.8": 5162.7031, "encoder_q-layer.9": 4265.1743, "epoch": 0.0, "inbatch_neg_score": 29.0446, "inbatch_pos_score": 30.5469, "learning_rate": 3e-06, "loss": 33.6179, "norm_diff": 0.7157, "norm_loss": 0.0, "num_token_doc": 66.8725, "num_token_overlap": 11.7097, "num_token_query": 31.4401, "num_token_union": 65.1889, "num_word_context": 202.2286, "num_word_doc": 49.8886, "num_word_query": 23.3439, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7179.5633, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 29.0312, "query_norm": 7.0044, "queue_k_norm": 7.733, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4401, "sent_len_1": 66.8725, "sent_len_max_0": 127.575, "sent_len_max_1": 189.6525, "stdk": 0.1561, "stdq": 0.0927, "stdqueue_k": 0.1559, "stdqueue_q": 0.0, "step": 600 }, { "accuracy": 12.8906, "active_queue_size": 16384.0, "cl_loss": 28.8227, "doc_norm": 7.5575, "encoder_q-embeddings": 2887.3291, "encoder_q-layer.0": 2257.1006, "encoder_q-layer.1": 2759.0339, "encoder_q-layer.10": 5661.3335, "encoder_q-layer.11": 9325.6641, "encoder_q-layer.2": 3242.4255, "encoder_q-layer.3": 3678.2488, "encoder_q-layer.4": 4237.0977, "encoder_q-layer.5": 4555.1006, "encoder_q-layer.6": 4088.8213, "encoder_q-layer.7": 4208.9731, "encoder_q-layer.8": 4360.96, "encoder_q-layer.9": 3267.022, "epoch": 0.0, "inbatch_neg_score": 25.6906, "inbatch_pos_score": 26.8594, "learning_rate": 3.5000000000000004e-06, "loss": 28.8227, "norm_diff": 1.1204, "norm_loss": 0.0, "num_token_doc": 66.667, "num_token_overlap": 11.6108, "num_token_query": 31.3391, "num_token_union": 65.0434, "num_word_context": 201.9315, "num_word_doc": 49.708, "num_word_query": 23.2596, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6565.7226, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 25.6719, "query_norm": 6.4371, "queue_k_norm": 7.566, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3391, "sent_len_1": 66.667, "sent_len_max_0": 127.5537, "sent_len_max_1": 188.74, "stdk": 0.1496, "stdq": 0.0858, "stdqueue_k": 0.1501, "stdqueue_q": 0.0, "step": 700 }, { "accuracy": 13.5742, "active_queue_size": 16384.0, "cl_loss": 24.1114, "doc_norm": 7.3825, "encoder_q-embeddings": 2957.7964, "encoder_q-layer.0": 2516.8484, "encoder_q-layer.1": 3365.7605, "encoder_q-layer.10": 6365.7671, "encoder_q-layer.11": 8538.6641, "encoder_q-layer.2": 3860.9487, "encoder_q-layer.3": 3948.1587, "encoder_q-layer.4": 4510.1665, "encoder_q-layer.5": 4788.9575, "encoder_q-layer.6": 4283.0791, "encoder_q-layer.7": 3996.7683, "encoder_q-layer.8": 4467.7227, "encoder_q-layer.9": 3162.7593, "epoch": 0.01, "inbatch_neg_score": 19.9979, "inbatch_pos_score": 21.0312, "learning_rate": 4.000000000000001e-06, "loss": 24.1114, "norm_diff": 1.931, "norm_loss": 0.0, "num_token_doc": 66.7596, "num_token_overlap": 11.6679, "num_token_query": 31.353, "num_token_union": 65.1184, "num_word_context": 202.1712, "num_word_doc": 49.824, "num_word_query": 23.291, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6509.8027, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 19.9531, "query_norm": 5.4516, "queue_k_norm": 7.4032, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.353, "sent_len_1": 66.7596, "sent_len_max_0": 127.6437, "sent_len_max_1": 188.7887, "stdk": 0.1418, "stdq": 0.0813, "stdqueue_k": 0.1439, "stdqueue_q": 0.0, "step": 800 }, { "accuracy": 11.6211, "active_queue_size": 16384.0, "cl_loss": 20.1155, "doc_norm": 7.2418, "encoder_q-embeddings": 6574.3794, "encoder_q-layer.0": 5611.3291, "encoder_q-layer.1": 7011.0181, "encoder_q-layer.10": 4673.4204, "encoder_q-layer.11": 7960.1831, "encoder_q-layer.2": 7735.7461, "encoder_q-layer.3": 8516.6406, "encoder_q-layer.4": 9935.5283, "encoder_q-layer.5": 9052.4971, "encoder_q-layer.6": 6482.9692, "encoder_q-layer.7": 4514.1499, "encoder_q-layer.8": 4104.0176, "encoder_q-layer.9": 2254.5679, "epoch": 0.01, "inbatch_neg_score": 14.258, "inbatch_pos_score": 15.0156, "learning_rate": 4.5e-06, "loss": 20.1155, "norm_diff": 2.9086, "norm_loss": 0.0, "num_token_doc": 66.6505, "num_token_overlap": 11.6717, "num_token_query": 31.4237, "num_token_union": 65.0783, "num_word_context": 202.0839, "num_word_doc": 49.6772, "num_word_query": 23.3127, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10073.0328, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 14.2344, "query_norm": 4.3333, "queue_k_norm": 7.2559, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4237, "sent_len_1": 66.6505, "sent_len_max_0": 127.5037, "sent_len_max_1": 190.1037, "stdk": 0.1376, "stdq": 0.0767, "stdqueue_k": 0.1385, "stdqueue_q": 0.0, "step": 900 }, { "accuracy": 11.6211, "active_queue_size": 16384.0, "cl_loss": 16.9555, "doc_norm": 7.1074, "encoder_q-embeddings": 9618.9111, "encoder_q-layer.0": 9774.9541, "encoder_q-layer.1": 11039.5928, "encoder_q-layer.10": 5118.8784, "encoder_q-layer.11": 7634.9888, "encoder_q-layer.2": 12360.0029, "encoder_q-layer.3": 12705.6885, "encoder_q-layer.4": 13261.5811, "encoder_q-layer.5": 13118.3682, "encoder_q-layer.6": 11998.0303, "encoder_q-layer.7": 8883.0654, "encoder_q-layer.8": 7069.79, "encoder_q-layer.9": 2486.7903, "epoch": 0.01, "inbatch_neg_score": 10.0622, "inbatch_pos_score": 10.6875, "learning_rate": 5e-06, "loss": 16.9555, "norm_diff": 3.6524, "norm_loss": 0.0, "num_token_doc": 66.8059, "num_token_overlap": 11.7042, "num_token_query": 31.4915, "num_token_union": 65.1917, "num_word_context": 202.5884, "num_word_doc": 49.8943, "num_word_query": 23.3893, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14956.4966, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 10.0312, "query_norm": 3.455, "queue_k_norm": 7.1229, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4915, "sent_len_1": 66.8059, "sent_len_max_0": 127.455, "sent_len_max_1": 190.5975, "stdk": 0.1313, "stdq": 0.0727, "stdqueue_k": 0.1328, "stdqueue_q": 0.0, "step": 1000 }, { "accuracy": 12.5977, "active_queue_size": 16384.0, "cl_loss": 15.0465, "doc_norm": 6.9736, "encoder_q-embeddings": 6912.3091, "encoder_q-layer.0": 6191.103, "encoder_q-layer.1": 7586.7183, "encoder_q-layer.10": 5773.4053, "encoder_q-layer.11": 9603.9756, "encoder_q-layer.2": 8104.0503, "encoder_q-layer.3": 8779.417, "encoder_q-layer.4": 10098.2305, "encoder_q-layer.5": 11343.7285, "encoder_q-layer.6": 9552.1924, "encoder_q-layer.7": 7671.4814, "encoder_q-layer.8": 6800.894, "encoder_q-layer.9": 2603.8442, "epoch": 0.01, "inbatch_neg_score": 6.4596, "inbatch_pos_score": 7.0312, "learning_rate": 5.500000000000001e-06, "loss": 15.0465, "norm_diff": 4.1842, "norm_loss": 0.0, "num_token_doc": 66.9923, "num_token_overlap": 11.7004, "num_token_query": 31.365, "num_token_union": 65.2006, "num_word_context": 202.7004, "num_word_doc": 50.0119, "num_word_query": 23.2948, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11707.8673, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 6.4453, "query_norm": 2.7894, "queue_k_norm": 6.9892, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.365, "sent_len_1": 66.9923, "sent_len_max_0": 127.5375, "sent_len_max_1": 190.5712, "stdk": 0.1251, "stdq": 0.0687, "stdqueue_k": 0.1264, "stdqueue_q": 0.0, "step": 1100 }, { "accuracy": 14.1602, "active_queue_size": 16384.0, "cl_loss": 14.098, "doc_norm": 6.8658, "encoder_q-embeddings": 15925.2109, "encoder_q-layer.0": 14515.2812, "encoder_q-layer.1": 15888.1582, "encoder_q-layer.10": 3778.01, "encoder_q-layer.11": 6889.5073, "encoder_q-layer.2": 10072.7178, "encoder_q-layer.3": 9208.5645, "encoder_q-layer.4": 9903.5771, "encoder_q-layer.5": 9669.7607, "encoder_q-layer.6": 7723.7583, "encoder_q-layer.7": 5116.4263, "encoder_q-layer.8": 3876.594, "encoder_q-layer.9": 1744.7479, "epoch": 0.01, "inbatch_neg_score": 7.8411, "inbatch_pos_score": 8.3906, "learning_rate": 6e-06, "loss": 14.098, "norm_diff": 4.1211, "norm_loss": 0.0, "num_token_doc": 66.6279, "num_token_overlap": 11.6404, "num_token_query": 31.3914, "num_token_union": 65.1162, "num_word_context": 202.2885, "num_word_doc": 49.7698, "num_word_query": 23.317, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15164.1888, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 7.8359, "query_norm": 2.7448, "queue_k_norm": 6.8837, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3914, "sent_len_1": 66.6279, "sent_len_max_0": 127.5738, "sent_len_max_1": 187.81, "stdk": 0.1207, "stdq": 0.0671, "stdqueue_k": 0.1218, "stdqueue_q": 0.0, "step": 1200 }, { "accuracy": 12.1094, "active_queue_size": 16384.0, "cl_loss": 13.0063, "doc_norm": 6.7712, "encoder_q-embeddings": 6040.6943, "encoder_q-layer.0": 5166.5527, "encoder_q-layer.1": 5871.3901, "encoder_q-layer.10": 3152.9983, "encoder_q-layer.11": 5990.7876, "encoder_q-layer.2": 6343.0098, "encoder_q-layer.3": 6261.3091, "encoder_q-layer.4": 6241.4346, "encoder_q-layer.5": 6315.291, "encoder_q-layer.6": 5247.5366, "encoder_q-layer.7": 4027.9065, "encoder_q-layer.8": 3576.0447, "encoder_q-layer.9": 1665.1992, "epoch": 0.01, "inbatch_neg_score": 2.2661, "inbatch_pos_score": 2.7305, "learning_rate": 6.5000000000000004e-06, "loss": 13.0063, "norm_diff": 4.5847, "norm_loss": 0.0, "num_token_doc": 66.8471, "num_token_overlap": 11.7033, "num_token_query": 31.4319, "num_token_union": 65.1804, "num_word_context": 202.1953, "num_word_doc": 49.8707, "num_word_query": 23.3388, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7815.496, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 2.2598, "query_norm": 2.1865, "queue_k_norm": 6.7827, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4319, "sent_len_1": 66.8471, "sent_len_max_0": 127.6213, "sent_len_max_1": 189.2212, "stdk": 0.1159, "stdq": 0.061, "stdqueue_k": 0.1167, "stdqueue_q": 0.0, "step": 1300 }, { "accuracy": 13.4766, "active_queue_size": 16384.0, "cl_loss": 12.2933, "doc_norm": 6.6791, "encoder_q-embeddings": 4609.3218, "encoder_q-layer.0": 4064.4663, "encoder_q-layer.1": 4171.2109, "encoder_q-layer.10": 4088.2432, "encoder_q-layer.11": 7548.1431, "encoder_q-layer.2": 4736.585, "encoder_q-layer.3": 5165.8882, "encoder_q-layer.4": 5842.2705, "encoder_q-layer.5": 6374.0137, "encoder_q-layer.6": 5009.6006, "encoder_q-layer.7": 4106.999, "encoder_q-layer.8": 4149.314, "encoder_q-layer.9": 2076.8955, "epoch": 0.01, "inbatch_neg_score": 4.2731, "inbatch_pos_score": 4.793, "learning_rate": 7.000000000000001e-06, "loss": 12.2933, "norm_diff": 4.442, "norm_loss": 0.0, "num_token_doc": 66.9413, "num_token_overlap": 11.5875, "num_token_query": 31.2163, "num_token_union": 65.2056, "num_word_context": 202.4863, "num_word_doc": 49.964, "num_word_query": 23.1824, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7035.8232, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 4.2695, "query_norm": 2.2371, "queue_k_norm": 6.6928, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2163, "sent_len_1": 66.9413, "sent_len_max_0": 127.4062, "sent_len_max_1": 190.205, "stdk": 0.1111, "stdq": 0.0617, "stdqueue_k": 0.1121, "stdqueue_q": 0.0, "step": 1400 }, { "accuracy": 14.8438, "active_queue_size": 16384.0, "cl_loss": 11.8028, "doc_norm": 6.6085, "encoder_q-embeddings": 2290.4412, "encoder_q-layer.0": 2035.0564, "encoder_q-layer.1": 2361.6206, "encoder_q-layer.10": 2848.7747, "encoder_q-layer.11": 5539.4087, "encoder_q-layer.2": 2589.916, "encoder_q-layer.3": 2661.7073, "encoder_q-layer.4": 2828.3284, "encoder_q-layer.5": 3075.1489, "encoder_q-layer.6": 2430.647, "encoder_q-layer.7": 1846.0645, "encoder_q-layer.8": 1965.7549, "encoder_q-layer.9": 1432.2268, "epoch": 0.01, "inbatch_neg_score": 2.9996, "inbatch_pos_score": 3.4766, "learning_rate": 7.5e-06, "loss": 11.8028, "norm_diff": 4.4931, "norm_loss": 0.0, "num_token_doc": 66.6852, "num_token_overlap": 11.6704, "num_token_query": 31.3481, "num_token_union": 65.0023, "num_word_context": 202.0384, "num_word_doc": 49.7775, "num_word_query": 23.2844, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3976.9279, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 2.9961, "query_norm": 2.1153, "queue_k_norm": 6.6061, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3481, "sent_len_1": 66.6852, "sent_len_max_0": 127.575, "sent_len_max_1": 190.5462, "stdk": 0.1073, "stdq": 0.058, "stdqueue_k": 0.1074, "stdqueue_q": 0.0, "step": 1500 }, { "accuracy": 12.4023, "active_queue_size": 16384.0, "cl_loss": 11.495, "doc_norm": 6.5185, "encoder_q-embeddings": 5223.3813, "encoder_q-layer.0": 4307.4033, "encoder_q-layer.1": 4941.8442, "encoder_q-layer.10": 6773.2617, "encoder_q-layer.11": 10520.1592, "encoder_q-layer.2": 5480.3706, "encoder_q-layer.3": 5833.3848, "encoder_q-layer.4": 6758.1802, "encoder_q-layer.5": 8219.6875, "encoder_q-layer.6": 7298.4834, "encoder_q-layer.7": 6931.874, "encoder_q-layer.8": 7390.6401, "encoder_q-layer.9": 3537.4822, "epoch": 0.01, "inbatch_neg_score": 3.5586, "inbatch_pos_score": 4.0391, "learning_rate": 8.000000000000001e-06, "loss": 11.495, "norm_diff": 4.3233, "norm_loss": 0.0, "num_token_doc": 67.0498, "num_token_overlap": 11.7277, "num_token_query": 31.4224, "num_token_union": 65.3042, "num_word_context": 202.792, "num_word_doc": 50.0518, "num_word_query": 23.329, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9259.2419, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 3.5371, "query_norm": 2.1952, "queue_k_norm": 6.5181, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4224, "sent_len_1": 67.0498, "sent_len_max_0": 127.3713, "sent_len_max_1": 187.54, "stdk": 0.1017, "stdq": 0.0601, "stdqueue_k": 0.1027, "stdqueue_q": 0.0, "step": 1600 }, { "accuracy": 13.6719, "active_queue_size": 16384.0, "cl_loss": 11.4021, "doc_norm": 6.4154, "encoder_q-embeddings": 8081.0015, "encoder_q-layer.0": 7115.915, "encoder_q-layer.1": 7995.9404, "encoder_q-layer.10": 2866.498, "encoder_q-layer.11": 5414.8359, "encoder_q-layer.2": 8378.0088, "encoder_q-layer.3": 8810.8691, "encoder_q-layer.4": 9044.1064, "encoder_q-layer.5": 9270.0098, "encoder_q-layer.6": 9751.2441, "encoder_q-layer.7": 10067.042, "encoder_q-layer.8": 12273.9082, "encoder_q-layer.9": 2159.1111, "epoch": 0.01, "inbatch_neg_score": 3.3422, "inbatch_pos_score": 3.7988, "learning_rate": 8.500000000000002e-06, "loss": 11.4021, "norm_diff": 4.1671, "norm_loss": 0.0, "num_token_doc": 66.7696, "num_token_overlap": 11.6422, "num_token_query": 31.2732, "num_token_union": 65.1205, "num_word_context": 202.3565, "num_word_doc": 49.8475, "num_word_query": 23.2252, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12187.5915, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 3.3301, "query_norm": 2.2484, "queue_k_norm": 6.4316, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2732, "sent_len_1": 66.7696, "sent_len_max_0": 127.4675, "sent_len_max_1": 186.3338, "stdk": 0.0968, "stdq": 0.0605, "stdqueue_k": 0.0983, "stdqueue_q": 0.0, "step": 1700 }, { "accuracy": 12.6953, "active_queue_size": 16384.0, "cl_loss": 11.463, "doc_norm": 6.3315, "encoder_q-embeddings": 7666.4707, "encoder_q-layer.0": 6903.7715, "encoder_q-layer.1": 7434.9536, "encoder_q-layer.10": 8906.709, "encoder_q-layer.11": 11342.5693, "encoder_q-layer.2": 8443.9395, "encoder_q-layer.3": 8931.9326, "encoder_q-layer.4": 10064.7695, "encoder_q-layer.5": 9626.4668, "encoder_q-layer.6": 8456.6904, "encoder_q-layer.7": 6597.7324, "encoder_q-layer.8": 6269.7671, "encoder_q-layer.9": 4041.4998, "epoch": 0.01, "inbatch_neg_score": 1.726, "inbatch_pos_score": 2.1836, "learning_rate": 9e-06, "loss": 11.463, "norm_diff": 4.0364, "norm_loss": 0.0, "num_token_doc": 66.6733, "num_token_overlap": 11.6493, "num_token_query": 31.3056, "num_token_union": 65.0682, "num_word_context": 202.2621, "num_word_doc": 49.7758, "num_word_query": 23.2468, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11684.7661, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.7266, "query_norm": 2.2951, "queue_k_norm": 6.3383, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3056, "sent_len_1": 66.6733, "sent_len_max_0": 127.4912, "sent_len_max_1": 188.375, "stdk": 0.0942, "stdq": 0.0641, "stdqueue_k": 0.094, "stdqueue_q": 0.0, "step": 1800 }, { "accuracy": 12.1094, "active_queue_size": 16384.0, "cl_loss": 11.0283, "doc_norm": 6.2045, "encoder_q-embeddings": 5853.5762, "encoder_q-layer.0": 5453.2153, "encoder_q-layer.1": 6052.6738, "encoder_q-layer.10": 2024.2556, "encoder_q-layer.11": 4231.2583, "encoder_q-layer.2": 5933.1704, "encoder_q-layer.3": 5894.1865, "encoder_q-layer.4": 6529.9512, "encoder_q-layer.5": 8289.415, "encoder_q-layer.6": 7693.6646, "encoder_q-layer.7": 7192.1719, "encoder_q-layer.8": 5899.1973, "encoder_q-layer.9": 1461.5854, "epoch": 0.01, "inbatch_neg_score": 1.9024, "inbatch_pos_score": 2.332, "learning_rate": 9.5e-06, "loss": 11.0283, "norm_diff": 3.9699, "norm_loss": 0.0, "num_token_doc": 66.7991, "num_token_overlap": 11.6514, "num_token_query": 31.2733, "num_token_union": 65.0729, "num_word_context": 202.2678, "num_word_doc": 49.8522, "num_word_query": 23.2132, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8616.5822, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.9004, "query_norm": 2.2346, "queue_k_norm": 6.2296, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2733, "sent_len_1": 66.7991, "sent_len_max_0": 127.41, "sent_len_max_1": 190.8825, "stdk": 0.0895, "stdq": 0.0631, "stdqueue_k": 0.09, "stdqueue_q": 0.0, "step": 1900 }, { "accuracy": 13.3789, "active_queue_size": 16384.0, "cl_loss": 10.889, "doc_norm": 6.0824, "encoder_q-embeddings": 3722.3198, "encoder_q-layer.0": 3213.9265, "encoder_q-layer.1": 3914.1709, "encoder_q-layer.10": 3011.272, "encoder_q-layer.11": 5062.2656, "encoder_q-layer.2": 4520.8184, "encoder_q-layer.3": 4607.0957, "encoder_q-layer.4": 4926.4663, "encoder_q-layer.5": 5775.9048, "encoder_q-layer.6": 4515.6831, "encoder_q-layer.7": 3597.7209, "encoder_q-layer.8": 3272.5388, "encoder_q-layer.9": 1838.2456, "epoch": 0.01, "inbatch_neg_score": 2.9077, "inbatch_pos_score": 3.332, "learning_rate": 1e-05, "loss": 10.889, "norm_diff": 3.6389, "norm_loss": 0.0, "num_token_doc": 66.5467, "num_token_overlap": 11.616, "num_token_query": 31.1572, "num_token_union": 64.9227, "num_word_context": 201.7009, "num_word_doc": 49.6497, "num_word_query": 23.1213, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5881.3416, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 2.8887, "query_norm": 2.4436, "queue_k_norm": 6.1037, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.1572, "sent_len_1": 66.5467, "sent_len_max_0": 127.4712, "sent_len_max_1": 189.8063, "stdk": 0.0857, "stdq": 0.0673, "stdqueue_k": 0.0866, "stdqueue_q": 0.0, "step": 2000 }, { "accuracy": 14.1602, "active_queue_size": 16384.0, "cl_loss": 10.6873, "doc_norm": 5.9146, "encoder_q-embeddings": 8080.3857, "encoder_q-layer.0": 6996.417, "encoder_q-layer.1": 7792.6016, "encoder_q-layer.10": 4679.6763, "encoder_q-layer.11": 8582.3408, "encoder_q-layer.2": 8240.6191, "encoder_q-layer.3": 7407.2905, "encoder_q-layer.4": 7152.082, "encoder_q-layer.5": 6208.1323, "encoder_q-layer.6": 5512.0815, "encoder_q-layer.7": 4735.9878, "encoder_q-layer.8": 5136.1914, "encoder_q-layer.9": 3065.3096, "epoch": 0.01, "inbatch_neg_score": 1.5838, "inbatch_pos_score": 2.0234, "learning_rate": 1.05e-05, "loss": 10.6873, "norm_diff": 3.4861, "norm_loss": 0.0, "num_token_doc": 67.0371, "num_token_overlap": 11.6876, "num_token_query": 31.4324, "num_token_union": 65.2613, "num_word_context": 202.3693, "num_word_doc": 50.0043, "num_word_query": 23.3462, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9798.5689, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5723, "query_norm": 2.4285, "queue_k_norm": 5.9494, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4324, "sent_len_1": 67.0371, "sent_len_max_0": 127.4625, "sent_len_max_1": 191.0637, "stdk": 0.0829, "stdq": 0.0691, "stdqueue_k": 0.0832, "stdqueue_q": 0.0, "step": 2100 }, { "accuracy": 17.2852, "active_queue_size": 16384.0, "cl_loss": 10.2651, "doc_norm": 5.77, "encoder_q-embeddings": 25561.9453, "encoder_q-layer.0": 24769.6816, "encoder_q-layer.1": 24417.748, "encoder_q-layer.10": 15422.4551, "encoder_q-layer.11": 16046.6328, "encoder_q-layer.2": 25776.8438, "encoder_q-layer.3": 24266.0234, "encoder_q-layer.4": 23363.3008, "encoder_q-layer.5": 21093.3203, "encoder_q-layer.6": 18365.0059, "encoder_q-layer.7": 18955.4121, "encoder_q-layer.8": 20022.5684, "encoder_q-layer.9": 12697.6816, "epoch": 0.01, "inbatch_neg_score": 1.0422, "inbatch_pos_score": 1.4824, "learning_rate": 1.1000000000000001e-05, "loss": 10.2651, "norm_diff": 3.3808, "norm_loss": 0.0, "num_token_doc": 66.6717, "num_token_overlap": 11.6704, "num_token_query": 31.3175, "num_token_union": 65.0409, "num_word_context": 202.3853, "num_word_doc": 49.786, "num_word_query": 23.2389, "postclip_grad_norm": 1.0, "preclip_grad_norm": 31559.8904, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 1.0293, "query_norm": 2.3892, "queue_k_norm": 5.7718, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3175, "sent_len_1": 66.6717, "sent_len_max_0": 127.5863, "sent_len_max_1": 186.6087, "stdk": 0.0795, "stdq": 0.07, "stdqueue_k": 0.0801, "stdqueue_q": 0.0, "step": 2200 }, { "accuracy": 18.2617, "active_queue_size": 16384.0, "cl_loss": 10.0125, "doc_norm": 5.546, "encoder_q-embeddings": 6558.1353, "encoder_q-layer.0": 5947.6689, "encoder_q-layer.1": 6254.9966, "encoder_q-layer.10": 14774.6074, "encoder_q-layer.11": 14835.9375, "encoder_q-layer.2": 7096.1543, "encoder_q-layer.3": 7136.0796, "encoder_q-layer.4": 8210.334, "encoder_q-layer.5": 10613.1572, "encoder_q-layer.6": 11729.4766, "encoder_q-layer.7": 12391.0137, "encoder_q-layer.8": 12277.7324, "encoder_q-layer.9": 9651.8271, "epoch": 0.01, "inbatch_neg_score": 1.1557, "inbatch_pos_score": 1.5742, "learning_rate": 1.1500000000000002e-05, "loss": 10.0125, "norm_diff": 3.1155, "norm_loss": 0.0, "num_token_doc": 66.7606, "num_token_overlap": 11.6722, "num_token_query": 31.4525, "num_token_union": 65.1844, "num_word_context": 202.145, "num_word_doc": 49.8435, "num_word_query": 23.3625, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13805.7221, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.1436, "query_norm": 2.4306, "queue_k_norm": 5.574, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4525, "sent_len_1": 66.7606, "sent_len_max_0": 127.4437, "sent_len_max_1": 189.4663, "stdk": 0.0768, "stdq": 0.0729, "stdqueue_k": 0.0772, "stdqueue_q": 0.0, "step": 2300 }, { "accuracy": 14.8438, "active_queue_size": 16384.0, "cl_loss": 9.6541, "doc_norm": 5.2921, "encoder_q-embeddings": 9795.1338, "encoder_q-layer.0": 8470.4893, "encoder_q-layer.1": 8914.0498, "encoder_q-layer.10": 14025.6611, "encoder_q-layer.11": 14249.5713, "encoder_q-layer.2": 9717.9014, "encoder_q-layer.3": 9048.6484, "encoder_q-layer.4": 8059.6387, "encoder_q-layer.5": 7406.937, "encoder_q-layer.6": 6323.2461, "encoder_q-layer.7": 6024.5869, "encoder_q-layer.8": 7310.7979, "encoder_q-layer.9": 8469.9277, "epoch": 0.02, "inbatch_neg_score": 1.4171, "inbatch_pos_score": 1.8486, "learning_rate": 1.2e-05, "loss": 9.6541, "norm_diff": 2.7546, "norm_loss": 0.0, "num_token_doc": 66.95, "num_token_overlap": 11.7264, "num_token_query": 31.3982, "num_token_union": 65.2078, "num_word_context": 202.4092, "num_word_doc": 49.9112, "num_word_query": 23.3421, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13121.1818, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4102, "query_norm": 2.5375, "queue_k_norm": 5.3633, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3982, "sent_len_1": 66.95, "sent_len_max_0": 127.6112, "sent_len_max_1": 191.4525, "stdk": 0.0745, "stdq": 0.0747, "stdqueue_k": 0.0746, "stdqueue_q": 0.0, "step": 2400 }, { "accuracy": 16.7969, "active_queue_size": 16384.0, "cl_loss": 9.3836, "doc_norm": 5.1186, "encoder_q-embeddings": 3013.9805, "encoder_q-layer.0": 2540.9946, "encoder_q-layer.1": 3290.3469, "encoder_q-layer.10": 24197.9141, "encoder_q-layer.11": 21447.7285, "encoder_q-layer.2": 4152.2129, "encoder_q-layer.3": 5142.2344, "encoder_q-layer.4": 7449.6694, "encoder_q-layer.5": 9997.8535, "encoder_q-layer.6": 13840.7002, "encoder_q-layer.7": 16776.5293, "encoder_q-layer.8": 17535.459, "encoder_q-layer.9": 17709.3262, "epoch": 0.02, "inbatch_neg_score": 1.6748, "inbatch_pos_score": 2.1035, "learning_rate": 1.25e-05, "loss": 9.3836, "norm_diff": 2.4667, "norm_loss": 0.0, "num_token_doc": 66.7939, "num_token_overlap": 11.666, "num_token_query": 31.4492, "num_token_union": 65.2122, "num_word_context": 202.5033, "num_word_doc": 49.864, "num_word_query": 23.3586, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17404.6194, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 1.6631, "query_norm": 2.6519, "queue_k_norm": 5.1373, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4492, "sent_len_1": 66.7939, "sent_len_max_0": 127.645, "sent_len_max_1": 187.95, "stdk": 0.0721, "stdq": 0.0789, "stdqueue_k": 0.0724, "stdqueue_q": 0.0, "step": 2500 }, { "accuracy": 16.6992, "active_queue_size": 16384.0, "cl_loss": 9.2022, "doc_norm": 4.8645, "encoder_q-embeddings": 3025.2402, "encoder_q-layer.0": 2549.5986, "encoder_q-layer.1": 2557.5322, "encoder_q-layer.10": 10304.8818, "encoder_q-layer.11": 9771.2217, "encoder_q-layer.2": 2694.5247, "encoder_q-layer.3": 2550.0444, "encoder_q-layer.4": 2652.2598, "encoder_q-layer.5": 2822.8501, "encoder_q-layer.6": 3473.8115, "encoder_q-layer.7": 4231.7949, "encoder_q-layer.8": 5666.0449, "encoder_q-layer.9": 6872.2759, "epoch": 0.02, "inbatch_neg_score": 0.9503, "inbatch_pos_score": 1.3848, "learning_rate": 1.3000000000000001e-05, "loss": 9.2022, "norm_diff": 2.3281, "norm_loss": 0.0, "num_token_doc": 66.8531, "num_token_overlap": 11.6743, "num_token_query": 31.3256, "num_token_union": 65.1043, "num_word_context": 202.238, "num_word_doc": 49.8529, "num_word_query": 23.2443, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6788.1618, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.9307, "query_norm": 2.5364, "queue_k_norm": 4.9048, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3256, "sent_len_1": 66.8531, "sent_len_max_0": 127.5413, "sent_len_max_1": 191.6525, "stdk": 0.07, "stdq": 0.0743, "stdqueue_k": 0.0705, "stdqueue_q": 0.0, "step": 2600 }, { "accuracy": 18.1641, "active_queue_size": 16384.0, "cl_loss": 8.9095, "doc_norm": 4.6471, "encoder_q-embeddings": 3222.2754, "encoder_q-layer.0": 2780.2417, "encoder_q-layer.1": 3332.0457, "encoder_q-layer.10": 28762.6016, "encoder_q-layer.11": 25237.5371, "encoder_q-layer.2": 4221.5024, "encoder_q-layer.3": 5065.9541, "encoder_q-layer.4": 6941.1372, "encoder_q-layer.5": 9183.085, "encoder_q-layer.6": 13000.623, "encoder_q-layer.7": 16216.8545, "encoder_q-layer.8": 17499.3809, "encoder_q-layer.9": 20469.459, "epoch": 0.02, "inbatch_neg_score": 0.9682, "inbatch_pos_score": 1.4102, "learning_rate": 1.3500000000000001e-05, "loss": 8.9095, "norm_diff": 2.26, "norm_loss": 0.0, "num_token_doc": 66.7442, "num_token_overlap": 11.6537, "num_token_query": 31.3249, "num_token_union": 65.095, "num_word_context": 202.0157, "num_word_doc": 49.7786, "num_word_query": 23.2538, "postclip_grad_norm": 1.0, "preclip_grad_norm": 18850.6481, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.9609, "query_norm": 2.3871, "queue_k_norm": 4.6844, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3249, "sent_len_1": 66.7442, "sent_len_max_0": 127.285, "sent_len_max_1": 189.7375, "stdk": 0.0682, "stdq": 0.0702, "stdqueue_k": 0.0686, "stdqueue_q": 0.0, "step": 2700 }, { "accuracy": 17.2852, "active_queue_size": 16384.0, "cl_loss": 9.0641, "doc_norm": 4.4432, "encoder_q-embeddings": 3217.0227, "encoder_q-layer.0": 2751.7874, "encoder_q-layer.1": 3870.6404, "encoder_q-layer.10": 40270.8711, "encoder_q-layer.11": 32379.6035, "encoder_q-layer.2": 5436.3662, "encoder_q-layer.3": 7042.856, "encoder_q-layer.4": 10686.8965, "encoder_q-layer.5": 14756.6191, "encoder_q-layer.6": 21271.6973, "encoder_q-layer.7": 25747.2266, "encoder_q-layer.8": 28323.5566, "encoder_q-layer.9": 31365.918, "epoch": 0.02, "inbatch_neg_score": 1.108, "inbatch_pos_score": 1.5371, "learning_rate": 1.4000000000000001e-05, "loss": 9.0641, "norm_diff": 1.8923, "norm_loss": 0.0, "num_token_doc": 66.9661, "num_token_overlap": 11.728, "num_token_query": 31.4559, "num_token_union": 65.22, "num_word_context": 202.1758, "num_word_doc": 49.9508, "num_word_query": 23.3622, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27596.7148, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 1.0957, "query_norm": 2.5509, "queue_k_norm": 4.4833, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4559, "sent_len_1": 66.9661, "sent_len_max_0": 127.6038, "sent_len_max_1": 192.5037, "stdk": 0.0666, "stdq": 0.0745, "stdqueue_k": 0.067, "stdqueue_q": 0.0, "step": 2800 }, { "accuracy": 16.5039, "active_queue_size": 16384.0, "cl_loss": 9.2286, "doc_norm": 4.242, "encoder_q-embeddings": 3054.4961, "encoder_q-layer.0": 2681.6094, "encoder_q-layer.1": 3826.3223, "encoder_q-layer.10": 44936.5117, "encoder_q-layer.11": 36677.5938, "encoder_q-layer.2": 5399.0303, "encoder_q-layer.3": 6880.6802, "encoder_q-layer.4": 10017.6201, "encoder_q-layer.5": 14136.9902, "encoder_q-layer.6": 19838.6484, "encoder_q-layer.7": 24599.8398, "encoder_q-layer.8": 27855.2598, "encoder_q-layer.9": 34278.4766, "epoch": 0.02, "inbatch_neg_score": 1.0301, "inbatch_pos_score": 1.4717, "learning_rate": 1.45e-05, "loss": 9.2286, "norm_diff": 1.7222, "norm_loss": 0.0, "num_token_doc": 66.87, "num_token_overlap": 11.6782, "num_token_query": 31.4663, "num_token_union": 65.2353, "num_word_context": 202.4239, "num_word_doc": 49.8984, "num_word_query": 23.3849, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28981.7351, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 1.0264, "query_norm": 2.5199, "queue_k_norm": 4.3015, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4663, "sent_len_1": 66.87, "sent_len_max_0": 127.51, "sent_len_max_1": 189.2512, "stdk": 0.0657, "stdq": 0.0729, "stdqueue_k": 0.0659, "stdqueue_q": 0.0, "step": 2900 }, { "accuracy": 16.9922, "active_queue_size": 16384.0, "cl_loss": 8.9021, "doc_norm": 4.0895, "encoder_q-embeddings": 4218.7075, "encoder_q-layer.0": 3820.0278, "encoder_q-layer.1": 4592.4194, "encoder_q-layer.10": 25750.3867, "encoder_q-layer.11": 18969.4746, "encoder_q-layer.2": 5354.8457, "encoder_q-layer.3": 5506.606, "encoder_q-layer.4": 6605.3022, "encoder_q-layer.5": 8489.0254, "encoder_q-layer.6": 11622.2549, "encoder_q-layer.7": 13999.8457, "encoder_q-layer.8": 15973.7002, "encoder_q-layer.9": 19568.8418, "epoch": 0.02, "inbatch_neg_score": 0.5138, "inbatch_pos_score": 0.9268, "learning_rate": 1.5e-05, "loss": 8.9021, "norm_diff": 1.5715, "norm_loss": 0.0, "num_token_doc": 67.1236, "num_token_overlap": 11.6924, "num_token_query": 31.3316, "num_token_union": 65.2954, "num_word_context": 202.2803, "num_word_doc": 50.0767, "num_word_query": 23.2698, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16884.7234, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.5015, "query_norm": 2.518, "queue_k_norm": 4.128, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3316, "sent_len_1": 67.1236, "sent_len_max_0": 127.3125, "sent_len_max_1": 188.8537, "stdk": 0.0644, "stdq": 0.0731, "stdqueue_k": 0.0649, "stdqueue_q": 0.0, "step": 3000 }, { "accuracy": 14.9414, "active_queue_size": 16384.0, "cl_loss": 8.6346, "doc_norm": 3.9828, "encoder_q-embeddings": 3576.123, "encoder_q-layer.0": 3048.7327, "encoder_q-layer.1": 4138.1152, "encoder_q-layer.10": 50770.8906, "encoder_q-layer.11": 38120.8047, "encoder_q-layer.2": 5570.4922, "encoder_q-layer.3": 6759.4312, "encoder_q-layer.4": 10082.5127, "encoder_q-layer.5": 13732.0, "encoder_q-layer.6": 19858.1914, "encoder_q-layer.7": 24818.2383, "encoder_q-layer.8": 29666.7637, "encoder_q-layer.9": 37421.5039, "epoch": 0.02, "inbatch_neg_score": 0.951, "inbatch_pos_score": 1.3809, "learning_rate": 1.55e-05, "loss": 8.6346, "norm_diff": 1.5293, "norm_loss": 0.0, "num_token_doc": 66.6015, "num_token_overlap": 11.6117, "num_token_query": 31.197, "num_token_union": 64.9869, "num_word_context": 202.1856, "num_word_doc": 49.7033, "num_word_query": 23.166, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30980.7268, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.9443, "query_norm": 2.4535, "queue_k_norm": 3.974, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.197, "sent_len_1": 66.6015, "sent_len_max_0": 127.5575, "sent_len_max_1": 188.2663, "stdk": 0.0643, "stdq": 0.0712, "stdqueue_k": 0.064, "stdqueue_q": 0.0, "step": 3100 }, { "accuracy": 20.5078, "active_queue_size": 16384.0, "cl_loss": 8.7258, "doc_norm": 3.8095, "encoder_q-embeddings": 3007.2915, "encoder_q-layer.0": 2701.3406, "encoder_q-layer.1": 2984.1052, "encoder_q-layer.10": 7870.4922, "encoder_q-layer.11": 7322.5806, "encoder_q-layer.2": 3382.3132, "encoder_q-layer.3": 3386.353, "encoder_q-layer.4": 3703.2219, "encoder_q-layer.5": 4223.5742, "encoder_q-layer.6": 5441.7544, "encoder_q-layer.7": 6316.6162, "encoder_q-layer.8": 6714.3301, "encoder_q-layer.9": 7018.0742, "epoch": 0.02, "inbatch_neg_score": 0.757, "inbatch_pos_score": 1.1953, "learning_rate": 1.6000000000000003e-05, "loss": 8.7258, "norm_diff": 1.3082, "norm_loss": 0.0, "num_token_doc": 66.642, "num_token_overlap": 11.6306, "num_token_query": 31.3103, "num_token_union": 65.0423, "num_word_context": 202.3778, "num_word_doc": 49.736, "num_word_query": 23.2338, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7071.9843, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.749, "query_norm": 2.5013, "queue_k_norm": 3.8318, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3103, "sent_len_1": 66.642, "sent_len_max_0": 127.3975, "sent_len_max_1": 189.6275, "stdk": 0.0636, "stdq": 0.069, "stdqueue_k": 0.0636, "stdqueue_q": 0.0, "step": 3200 }, { "accuracy": 18.1641, "active_queue_size": 16384.0, "cl_loss": 8.4426, "doc_norm": 3.7155, "encoder_q-embeddings": 9635.6797, "encoder_q-layer.0": 9087.8652, "encoder_q-layer.1": 9314.457, "encoder_q-layer.10": 38669.2227, "encoder_q-layer.11": 29025.584, "encoder_q-layer.2": 10493.6973, "encoder_q-layer.3": 8297.3555, "encoder_q-layer.4": 8832.5684, "encoder_q-layer.5": 10072.0039, "encoder_q-layer.6": 12557.0723, "encoder_q-layer.7": 16058.9727, "encoder_q-layer.8": 19920.3477, "encoder_q-layer.9": 27560.6543, "epoch": 0.02, "inbatch_neg_score": 1.0645, "inbatch_pos_score": 1.4961, "learning_rate": 1.65e-05, "loss": 8.4426, "norm_diff": 1.2769, "norm_loss": 0.0, "num_token_doc": 66.7632, "num_token_overlap": 11.639, "num_token_query": 31.2645, "num_token_union": 65.0568, "num_word_context": 202.2278, "num_word_doc": 49.8082, "num_word_query": 23.1818, "postclip_grad_norm": 1.0, "preclip_grad_norm": 24151.6344, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 1.0557, "query_norm": 2.4386, "queue_k_norm": 3.7071, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2645, "sent_len_1": 66.7632, "sent_len_max_0": 127.4513, "sent_len_max_1": 188.1037, "stdk": 0.0627, "stdq": 0.0686, "stdqueue_k": 0.0631, "stdqueue_q": 0.0, "step": 3300 }, { "accuracy": 21.6797, "active_queue_size": 16384.0, "cl_loss": 8.1071, "doc_norm": 3.5981, "encoder_q-embeddings": 3143.1245, "encoder_q-layer.0": 2765.5493, "encoder_q-layer.1": 3685.0142, "encoder_q-layer.10": 51382.8164, "encoder_q-layer.11": 36963.1719, "encoder_q-layer.2": 5225.4922, "encoder_q-layer.3": 6453.6289, "encoder_q-layer.4": 9863.4551, "encoder_q-layer.5": 13317.2051, "encoder_q-layer.6": 18957.2148, "encoder_q-layer.7": 24215.4512, "encoder_q-layer.8": 27868.5742, "encoder_q-layer.9": 35543.625, "epoch": 0.02, "inbatch_neg_score": 0.5546, "inbatch_pos_score": 0.998, "learning_rate": 1.7000000000000003e-05, "loss": 8.1071, "norm_diff": 1.2641, "norm_loss": 0.0, "num_token_doc": 66.6347, "num_token_overlap": 11.6996, "num_token_query": 31.4032, "num_token_union": 64.9715, "num_word_context": 202.036, "num_word_doc": 49.7218, "num_word_query": 23.3187, "postclip_grad_norm": 1.0, "preclip_grad_norm": 30400.5959, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.5469, "query_norm": 2.3339, "queue_k_norm": 3.6027, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4032, "sent_len_1": 66.6347, "sent_len_max_0": 127.4638, "sent_len_max_1": 189.4187, "stdk": 0.0623, "stdq": 0.0661, "stdqueue_k": 0.0624, "stdqueue_q": 0.0, "step": 3400 }, { "accuracy": 18.1641, "active_queue_size": 16384.0, "cl_loss": 8.0844, "doc_norm": 3.4709, "encoder_q-embeddings": 2661.2183, "encoder_q-layer.0": 2324.4736, "encoder_q-layer.1": 2858.8123, "encoder_q-layer.10": 42807.6016, "encoder_q-layer.11": 32362.2383, "encoder_q-layer.2": 3869.3831, "encoder_q-layer.3": 4979.5586, "encoder_q-layer.4": 7860.9082, "encoder_q-layer.5": 11364.4111, "encoder_q-layer.6": 16531.5703, "encoder_q-layer.7": 21211.5723, "encoder_q-layer.8": 24904.6191, "encoder_q-layer.9": 30926.375, "epoch": 0.02, "inbatch_neg_score": 0.8974, "inbatch_pos_score": 1.3359, "learning_rate": 1.75e-05, "loss": 8.0844, "norm_diff": 1.0527, "norm_loss": 0.0, "num_token_doc": 66.7019, "num_token_overlap": 11.6933, "num_token_query": 31.4113, "num_token_union": 65.0436, "num_word_context": 202.4362, "num_word_doc": 49.7331, "num_word_query": 23.3211, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26099.5277, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.8896, "query_norm": 2.4182, "queue_k_norm": 3.4958, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4113, "sent_len_1": 66.7019, "sent_len_max_0": 127.505, "sent_len_max_1": 190.2887, "stdk": 0.0619, "stdq": 0.0674, "stdqueue_k": 0.0621, "stdqueue_q": 0.0, "step": 3500 }, { "accuracy": 22.7539, "active_queue_size": 16384.0, "cl_loss": 7.8684, "doc_norm": 3.3894, "encoder_q-embeddings": 1565.8269, "encoder_q-layer.0": 1310.2299, "encoder_q-layer.1": 1615.045, "encoder_q-layer.10": 17584.8203, "encoder_q-layer.11": 14488.4678, "encoder_q-layer.2": 1981.281, "encoder_q-layer.3": 2277.0435, "encoder_q-layer.4": 3097.4973, "encoder_q-layer.5": 4458.8848, "encoder_q-layer.6": 6673.9678, "encoder_q-layer.7": 8721.3711, "encoder_q-layer.8": 10544.6973, "encoder_q-layer.9": 12835.1855, "epoch": 0.02, "inbatch_neg_score": 0.9532, "inbatch_pos_score": 1.4043, "learning_rate": 1.8e-05, "loss": 7.8684, "norm_diff": 1.1241, "norm_loss": 0.0, "num_token_doc": 66.9975, "num_token_overlap": 11.7548, "num_token_query": 31.4776, "num_token_union": 65.2652, "num_word_context": 202.8111, "num_word_doc": 50.0195, "num_word_query": 23.3899, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11033.9876, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.9478, "query_norm": 2.2653, "queue_k_norm": 3.4055, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4776, "sent_len_1": 66.9975, "sent_len_max_0": 127.535, "sent_len_max_1": 188.6425, "stdk": 0.0619, "stdq": 0.0633, "stdqueue_k": 0.0617, "stdqueue_q": 0.0, "step": 3600 }, { "accuracy": 20.8984, "active_queue_size": 16384.0, "cl_loss": 7.5578, "doc_norm": 3.2952, "encoder_q-embeddings": 4516.6289, "encoder_q-layer.0": 4036.2883, "encoder_q-layer.1": 5457.6201, "encoder_q-layer.10": 91460.7891, "encoder_q-layer.11": 72051.0156, "encoder_q-layer.2": 7922.896, "encoder_q-layer.3": 10064.8584, "encoder_q-layer.4": 15367.6699, "encoder_q-layer.5": 22637.4062, "encoder_q-layer.6": 32067.1895, "encoder_q-layer.7": 42086.6211, "encoder_q-layer.8": 49964.0195, "encoder_q-layer.9": 61928.7383, "epoch": 0.02, "inbatch_neg_score": 0.9118, "inbatch_pos_score": 1.3652, "learning_rate": 1.85e-05, "loss": 7.5578, "norm_diff": 1.0774, "norm_loss": 0.0, "num_token_doc": 66.9491, "num_token_overlap": 11.6956, "num_token_query": 31.3917, "num_token_union": 65.1598, "num_word_context": 202.4648, "num_word_doc": 49.9344, "num_word_query": 23.3122, "postclip_grad_norm": 1.0, "preclip_grad_norm": 54870.8567, "preclip_grad_norm_avg": 0.0005, "q@queue_neg_score": 0.8989, "query_norm": 2.2179, "queue_k_norm": 3.3163, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3917, "sent_len_1": 66.9491, "sent_len_max_0": 127.3688, "sent_len_max_1": 192.1238, "stdk": 0.0617, "stdq": 0.0623, "stdqueue_k": 0.0618, "stdqueue_q": 0.0, "step": 3700 }, { "accuracy": 22.3633, "active_queue_size": 16384.0, "cl_loss": 7.5593, "doc_norm": 3.2332, "encoder_q-embeddings": 1540.3225, "encoder_q-layer.0": 1314.2546, "encoder_q-layer.1": 1609.2505, "encoder_q-layer.10": 24814.332, "encoder_q-layer.11": 21485.1543, "encoder_q-layer.2": 2226.5698, "encoder_q-layer.3": 2636.6323, "encoder_q-layer.4": 3765.7546, "encoder_q-layer.5": 5439.7832, "encoder_q-layer.6": 7543.4971, "encoder_q-layer.7": 9966.5059, "encoder_q-layer.8": 12240.584, "encoder_q-layer.9": 16575.4375, "epoch": 0.02, "inbatch_neg_score": 0.9604, "inbatch_pos_score": 1.4121, "learning_rate": 1.9e-05, "loss": 7.5593, "norm_diff": 1.0021, "norm_loss": 0.0, "num_token_doc": 66.8793, "num_token_overlap": 11.702, "num_token_query": 31.3829, "num_token_union": 65.1596, "num_word_context": 202.3027, "num_word_doc": 49.9031, "num_word_query": 23.3309, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14775.8596, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.9487, "query_norm": 2.2311, "queue_k_norm": 3.2374, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3829, "sent_len_1": 66.8793, "sent_len_max_0": 127.3287, "sent_len_max_1": 190.1188, "stdk": 0.0611, "stdq": 0.0624, "stdqueue_k": 0.0616, "stdqueue_q": 0.0, "step": 3800 }, { "accuracy": 21.875, "active_queue_size": 16384.0, "cl_loss": 7.3121, "doc_norm": 3.1409, "encoder_q-embeddings": 2387.832, "encoder_q-layer.0": 2121.8252, "encoder_q-layer.1": 2198.1794, "encoder_q-layer.10": 15523.2705, "encoder_q-layer.11": 13973.0586, "encoder_q-layer.2": 2588.9033, "encoder_q-layer.3": 2554.792, "encoder_q-layer.4": 3162.6292, "encoder_q-layer.5": 4233.7363, "encoder_q-layer.6": 5790.2573, "encoder_q-layer.7": 7195.2661, "encoder_q-layer.8": 8041.8369, "encoder_q-layer.9": 9525.6846, "epoch": 0.03, "inbatch_neg_score": 0.9237, "inbatch_pos_score": 1.3525, "learning_rate": 1.9500000000000003e-05, "loss": 7.3121, "norm_diff": 0.9928, "norm_loss": 0.0, "num_token_doc": 66.8467, "num_token_overlap": 11.6841, "num_token_query": 31.5137, "num_token_union": 65.2178, "num_word_context": 202.5761, "num_word_doc": 49.8104, "num_word_query": 23.4103, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9852.744, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.9189, "query_norm": 2.1481, "queue_k_norm": 3.1581, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.5137, "sent_len_1": 66.8467, "sent_len_max_0": 127.5925, "sent_len_max_1": 189.4638, "stdk": 0.0614, "stdq": 0.0593, "stdqueue_k": 0.0612, "stdqueue_q": 0.0, "step": 3900 }, { "accuracy": 24.4141, "active_queue_size": 16384.0, "cl_loss": 6.9422, "doc_norm": 3.0719, "encoder_q-embeddings": 1081.2245, "encoder_q-layer.0": 919.0593, "encoder_q-layer.1": 1025.9667, "encoder_q-layer.10": 2825.134, "encoder_q-layer.11": 4537.2852, "encoder_q-layer.2": 1174.4849, "encoder_q-layer.3": 1180.0022, "encoder_q-layer.4": 1174.9288, "encoder_q-layer.5": 1322.5334, "encoder_q-layer.6": 1768.7, "encoder_q-layer.7": 2184.4985, "encoder_q-layer.8": 2311.7268, "encoder_q-layer.9": 2219.2942, "epoch": 0.03, "inbatch_neg_score": 0.77, "inbatch_pos_score": 1.207, "learning_rate": 2e-05, "loss": 6.9422, "norm_diff": 0.9991, "norm_loss": 0.0, "num_token_doc": 66.7541, "num_token_overlap": 11.6586, "num_token_query": 31.3516, "num_token_union": 65.1245, "num_word_context": 202.3762, "num_word_doc": 49.8092, "num_word_query": 23.2931, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2902.3612, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7598, "query_norm": 2.0727, "queue_k_norm": 3.076, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3516, "sent_len_1": 66.7541, "sent_len_max_0": 127.5563, "sent_len_max_1": 189.6562, "stdk": 0.0604, "stdq": 0.0584, "stdqueue_k": 0.061, "stdqueue_q": 0.0, "step": 4000 }, { "accuracy": 22.8516, "active_queue_size": 16384.0, "cl_loss": 6.6642, "doc_norm": 2.9865, "encoder_q-embeddings": 3812.8433, "encoder_q-layer.0": 3232.4517, "encoder_q-layer.1": 3319.0693, "encoder_q-layer.10": 42728.7383, "encoder_q-layer.11": 42351.3984, "encoder_q-layer.2": 4168.7446, "encoder_q-layer.3": 4588.8789, "encoder_q-layer.4": 6194.9189, "encoder_q-layer.5": 8573.5957, "encoder_q-layer.6": 13032.1904, "encoder_q-layer.7": 18690.3066, "encoder_q-layer.8": 21995.6016, "encoder_q-layer.9": 27245.8281, "epoch": 0.03, "inbatch_neg_score": 0.7014, "inbatch_pos_score": 1.1387, "learning_rate": 2.05e-05, "loss": 6.6642, "norm_diff": 1.0255, "norm_loss": 0.0, "num_token_doc": 66.6718, "num_token_overlap": 11.6182, "num_token_query": 31.2641, "num_token_union": 65.029, "num_word_context": 202.4607, "num_word_doc": 49.7353, "num_word_query": 23.2077, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27476.2331, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.6934, "query_norm": 1.961, "queue_k_norm": 2.9933, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2641, "sent_len_1": 66.6718, "sent_len_max_0": 127.4087, "sent_len_max_1": 189.7388, "stdk": 0.0598, "stdq": 0.0541, "stdqueue_k": 0.0606, "stdqueue_q": 0.0, "step": 4100 }, { "accuracy": 24.0234, "active_queue_size": 16384.0, "cl_loss": 6.4472, "doc_norm": 2.8812, "encoder_q-embeddings": 2634.3372, "encoder_q-layer.0": 2273.4309, "encoder_q-layer.1": 2476.0349, "encoder_q-layer.10": 23693.5762, "encoder_q-layer.11": 27032.9668, "encoder_q-layer.2": 2968.1604, "encoder_q-layer.3": 3176.583, "encoder_q-layer.4": 3988.0405, "encoder_q-layer.5": 4996.873, "encoder_q-layer.6": 6843.0518, "encoder_q-layer.7": 9753.6787, "encoder_q-layer.8": 12535.9766, "encoder_q-layer.9": 16413.2578, "epoch": 0.03, "inbatch_neg_score": 0.7586, "inbatch_pos_score": 1.1797, "learning_rate": 2.1e-05, "loss": 6.4472, "norm_diff": 0.9823, "norm_loss": 0.0, "num_token_doc": 66.6845, "num_token_overlap": 11.6853, "num_token_query": 31.5154, "num_token_union": 65.1652, "num_word_context": 202.4728, "num_word_doc": 49.7584, "num_word_query": 23.3891, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16474.0805, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.7524, "query_norm": 1.8989, "queue_k_norm": 2.9, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5154, "sent_len_1": 66.6845, "sent_len_max_0": 127.6112, "sent_len_max_1": 188.5312, "stdk": 0.0604, "stdq": 0.0528, "stdqueue_k": 0.06, "stdqueue_q": 0.0, "step": 4200 }, { "accuracy": 27.5391, "active_queue_size": 16384.0, "cl_loss": 6.2745, "doc_norm": 2.7916, "encoder_q-embeddings": 3891.4915, "encoder_q-layer.0": 3442.6875, "encoder_q-layer.1": 3758.0308, "encoder_q-layer.10": 7060.6611, "encoder_q-layer.11": 8817.0781, "encoder_q-layer.2": 3783.2759, "encoder_q-layer.3": 3312.7976, "encoder_q-layer.4": 3179.2649, "encoder_q-layer.5": 3247.3037, "encoder_q-layer.6": 3245.7363, "encoder_q-layer.7": 3036.708, "encoder_q-layer.8": 4246.2642, "encoder_q-layer.9": 4776.4888, "epoch": 0.03, "inbatch_neg_score": 0.5891, "inbatch_pos_score": 1.0254, "learning_rate": 2.15e-05, "loss": 6.2745, "norm_diff": 0.9203, "norm_loss": 0.0, "num_token_doc": 66.7301, "num_token_overlap": 11.646, "num_token_query": 31.3392, "num_token_union": 65.0724, "num_word_context": 202.024, "num_word_doc": 49.7389, "num_word_query": 23.2698, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6524.9675, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.583, "query_norm": 1.8713, "queue_k_norm": 2.7995, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3392, "sent_len_1": 66.7301, "sent_len_max_0": 127.3713, "sent_len_max_1": 190.9437, "stdk": 0.0595, "stdq": 0.0526, "stdqueue_k": 0.0594, "stdqueue_q": 0.0, "step": 4300 }, { "accuracy": 28.3203, "active_queue_size": 16384.0, "cl_loss": 6.0372, "doc_norm": 2.6803, "encoder_q-embeddings": 2683.7517, "encoder_q-layer.0": 2298.4778, "encoder_q-layer.1": 2452.2878, "encoder_q-layer.10": 13004.6426, "encoder_q-layer.11": 13318.4072, "encoder_q-layer.2": 2857.302, "encoder_q-layer.3": 2656.0334, "encoder_q-layer.4": 2905.5127, "encoder_q-layer.5": 3578.24, "encoder_q-layer.6": 5307.27, "encoder_q-layer.7": 6939.0752, "encoder_q-layer.8": 8438.3652, "encoder_q-layer.9": 9122.1396, "epoch": 0.03, "inbatch_neg_score": 0.4629, "inbatch_pos_score": 0.9126, "learning_rate": 2.2000000000000003e-05, "loss": 6.0372, "norm_diff": 0.8908, "norm_loss": 0.0, "num_token_doc": 66.9872, "num_token_overlap": 11.6734, "num_token_query": 31.339, "num_token_union": 65.2036, "num_word_context": 202.6783, "num_word_doc": 49.9458, "num_word_query": 23.2609, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9482.7697, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4585, "query_norm": 1.7894, "queue_k_norm": 2.6999, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.339, "sent_len_1": 66.9872, "sent_len_max_0": 127.45, "sent_len_max_1": 188.7738, "stdk": 0.0583, "stdq": 0.0488, "stdqueue_k": 0.0587, "stdqueue_q": 0.0, "step": 4400 }, { "accuracy": 28.9062, "active_queue_size": 16384.0, "cl_loss": 5.9345, "doc_norm": 2.6021, "encoder_q-embeddings": 10995.8574, "encoder_q-layer.0": 10205.332, "encoder_q-layer.1": 8999.7461, "encoder_q-layer.10": 3707.4041, "encoder_q-layer.11": 6483.7119, "encoder_q-layer.2": 8543.3555, "encoder_q-layer.3": 7973.6416, "encoder_q-layer.4": 5831.2817, "encoder_q-layer.5": 3775.1025, "encoder_q-layer.6": 2567.124, "encoder_q-layer.7": 2083.7346, "encoder_q-layer.8": 2284.1199, "encoder_q-layer.9": 2431.1531, "epoch": 0.03, "inbatch_neg_score": 0.5583, "inbatch_pos_score": 1.0127, "learning_rate": 2.25e-05, "loss": 5.9345, "norm_diff": 0.8006, "norm_loss": 0.0, "num_token_doc": 66.829, "num_token_overlap": 11.6605, "num_token_query": 31.3147, "num_token_union": 65.1637, "num_word_context": 202.4904, "num_word_doc": 49.8569, "num_word_query": 23.2453, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10369.3202, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5537, "query_norm": 1.8015, "queue_k_norm": 2.5974, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3147, "sent_len_1": 66.829, "sent_len_max_0": 127.605, "sent_len_max_1": 189.4725, "stdk": 0.0581, "stdq": 0.049, "stdqueue_k": 0.0579, "stdqueue_q": 0.0, "step": 4500 }, { "accuracy": 31.543, "active_queue_size": 16384.0, "cl_loss": 5.8117, "doc_norm": 2.4841, "encoder_q-embeddings": 1774.7433, "encoder_q-layer.0": 1444.0067, "encoder_q-layer.1": 1506.6521, "encoder_q-layer.10": 3508.2297, "encoder_q-layer.11": 5888.5488, "encoder_q-layer.2": 1613.7139, "encoder_q-layer.3": 1523.6453, "encoder_q-layer.4": 1437.568, "encoder_q-layer.5": 1386.1366, "encoder_q-layer.6": 1424.2668, "encoder_q-layer.7": 1541.0537, "encoder_q-layer.8": 1940.2738, "encoder_q-layer.9": 2237.3335, "epoch": 0.03, "inbatch_neg_score": 0.5, "inbatch_pos_score": 0.9609, "learning_rate": 2.3000000000000003e-05, "loss": 5.8117, "norm_diff": 0.6814, "norm_loss": 0.0, "num_token_doc": 66.6084, "num_token_overlap": 11.6541, "num_token_query": 31.3355, "num_token_union": 65.026, "num_word_context": 201.9814, "num_word_doc": 49.6848, "num_word_query": 23.29, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3465.3489, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4929, "query_norm": 1.8026, "queue_k_norm": 2.5022, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3355, "sent_len_1": 66.6084, "sent_len_max_0": 127.5888, "sent_len_max_1": 189.78, "stdk": 0.0562, "stdq": 0.0482, "stdqueue_k": 0.0573, "stdqueue_q": 0.0, "step": 4600 }, { "accuracy": 27.832, "active_queue_size": 16384.0, "cl_loss": 5.7391, "doc_norm": 2.401, "encoder_q-embeddings": 3282.4983, "encoder_q-layer.0": 2556.8765, "encoder_q-layer.1": 2748.031, "encoder_q-layer.10": 10924.252, "encoder_q-layer.11": 13244.9551, "encoder_q-layer.2": 2865.4468, "encoder_q-layer.3": 2617.0769, "encoder_q-layer.4": 2805.0747, "encoder_q-layer.5": 3605.4812, "encoder_q-layer.6": 4223.6279, "encoder_q-layer.7": 5405.3633, "encoder_q-layer.8": 6426.0693, "encoder_q-layer.9": 8196.6318, "epoch": 0.03, "inbatch_neg_score": 0.5927, "inbatch_pos_score": 1.0195, "learning_rate": 2.35e-05, "loss": 5.7391, "norm_diff": 0.6271, "norm_loss": 0.0, "num_token_doc": 66.6278, "num_token_overlap": 11.6215, "num_token_query": 31.2524, "num_token_union": 64.9984, "num_word_context": 202.1106, "num_word_doc": 49.7381, "num_word_query": 23.225, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8707.8952, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5869, "query_norm": 1.7739, "queue_k_norm": 2.4048, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2524, "sent_len_1": 66.6278, "sent_len_max_0": 127.4587, "sent_len_max_1": 188.9288, "stdk": 0.0562, "stdq": 0.0479, "stdqueue_k": 0.0564, "stdqueue_q": 0.0, "step": 4700 }, { "accuracy": 28.5156, "active_queue_size": 16384.0, "cl_loss": 5.6127, "doc_norm": 2.3201, "encoder_q-embeddings": 3033.7971, "encoder_q-layer.0": 2624.8813, "encoder_q-layer.1": 2704.9353, "encoder_q-layer.10": 7851.2637, "encoder_q-layer.11": 9331.3496, "encoder_q-layer.2": 2807.6189, "encoder_q-layer.3": 2538.8862, "encoder_q-layer.4": 2799.3625, "encoder_q-layer.5": 3311.855, "encoder_q-layer.6": 4143.853, "encoder_q-layer.7": 5091.3135, "encoder_q-layer.8": 5390.7646, "encoder_q-layer.9": 5658.3662, "epoch": 0.03, "inbatch_neg_score": 0.5966, "inbatch_pos_score": 1.0557, "learning_rate": 2.4e-05, "loss": 5.6127, "norm_diff": 0.5592, "norm_loss": 0.0, "num_token_doc": 66.9269, "num_token_overlap": 11.7468, "num_token_query": 31.46, "num_token_union": 65.1955, "num_word_context": 202.4722, "num_word_doc": 49.9523, "num_word_query": 23.3624, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6789.7112, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5918, "query_norm": 1.7609, "queue_k_norm": 2.3204, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.46, "sent_len_1": 66.9269, "sent_len_max_0": 127.5962, "sent_len_max_1": 189.4638, "stdk": 0.0556, "stdq": 0.0472, "stdqueue_k": 0.0556, "stdqueue_q": 0.0, "step": 4800 }, { "accuracy": 28.9062, "active_queue_size": 16384.0, "cl_loss": 5.5678, "doc_norm": 2.2469, "encoder_q-embeddings": 3610.1338, "encoder_q-layer.0": 3212.2539, "encoder_q-layer.1": 3346.5674, "encoder_q-layer.10": 7026.7002, "encoder_q-layer.11": 9015.0186, "encoder_q-layer.2": 3088.8479, "encoder_q-layer.3": 3240.3562, "encoder_q-layer.4": 3043.1013, "encoder_q-layer.5": 2850.6873, "encoder_q-layer.6": 3402.2876, "encoder_q-layer.7": 4109.5933, "encoder_q-layer.8": 5174.3418, "encoder_q-layer.9": 5512.8521, "epoch": 0.03, "inbatch_neg_score": 0.6017, "inbatch_pos_score": 1.0576, "learning_rate": 2.45e-05, "loss": 5.5678, "norm_diff": 0.4747, "norm_loss": 0.0, "num_token_doc": 67.099, "num_token_overlap": 11.666, "num_token_query": 31.3374, "num_token_union": 65.3402, "num_word_context": 202.8045, "num_word_doc": 50.0721, "num_word_query": 23.2801, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6708.1778, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5962, "query_norm": 1.7722, "queue_k_norm": 2.2439, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3374, "sent_len_1": 67.099, "sent_len_max_0": 127.5162, "sent_len_max_1": 189.8775, "stdk": 0.0554, "stdq": 0.0466, "stdqueue_k": 0.0549, "stdqueue_q": 0.0, "step": 4900 }, { "accuracy": 32.1289, "active_queue_size": 16384.0, "cl_loss": 5.5224, "doc_norm": 2.1645, "encoder_q-embeddings": 8382.5117, "encoder_q-layer.0": 6940.7241, "encoder_q-layer.1": 7267.8428, "encoder_q-layer.10": 2544.8198, "encoder_q-layer.11": 4529.2271, "encoder_q-layer.2": 4636.5688, "encoder_q-layer.3": 2963.4612, "encoder_q-layer.4": 2128.4336, "encoder_q-layer.5": 1822.9484, "encoder_q-layer.6": 1616.5182, "encoder_q-layer.7": 1675.6989, "encoder_q-layer.8": 1935.5331, "encoder_q-layer.9": 1947.9229, "epoch": 0.03, "inbatch_neg_score": 0.6269, "inbatch_pos_score": 1.083, "learning_rate": 2.5e-05, "loss": 5.5224, "norm_diff": 0.3889, "norm_loss": 0.0, "num_token_doc": 66.741, "num_token_overlap": 11.6601, "num_token_query": 31.2647, "num_token_union": 65.0242, "num_word_context": 202.0434, "num_word_doc": 49.8105, "num_word_query": 23.199, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7009.6468, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6196, "query_norm": 1.7756, "queue_k_norm": 2.1741, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2647, "sent_len_1": 66.741, "sent_len_max_0": 127.4112, "sent_len_max_1": 189.0625, "stdk": 0.0536, "stdq": 0.0465, "stdqueue_k": 0.0542, "stdqueue_q": 0.0, "step": 5000 }, { "accuracy": 30.5664, "active_queue_size": 16384.0, "cl_loss": 5.4238, "doc_norm": 2.103, "encoder_q-embeddings": 4867.9126, "encoder_q-layer.0": 4344.1025, "encoder_q-layer.1": 4038.1848, "encoder_q-layer.10": 6366.0176, "encoder_q-layer.11": 9264.2236, "encoder_q-layer.2": 3237.6243, "encoder_q-layer.3": 2672.4895, "encoder_q-layer.4": 2250.9624, "encoder_q-layer.5": 2150.0857, "encoder_q-layer.6": 2523.0056, "encoder_q-layer.7": 3194.8931, "encoder_q-layer.8": 4222.7822, "encoder_q-layer.9": 4992.4258, "epoch": 0.03, "inbatch_neg_score": 0.6398, "inbatch_pos_score": 1.1045, "learning_rate": 2.5500000000000003e-05, "loss": 5.4238, "norm_diff": 0.3002, "norm_loss": 0.0, "num_token_doc": 66.876, "num_token_overlap": 11.7208, "num_token_query": 31.4517, "num_token_union": 65.1783, "num_word_context": 202.1874, "num_word_doc": 49.8949, "num_word_query": 23.3673, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6602.4528, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6348, "query_norm": 1.8028, "queue_k_norm": 2.1117, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4517, "sent_len_1": 66.876, "sent_len_max_0": 127.565, "sent_len_max_1": 191.3063, "stdk": 0.0532, "stdq": 0.0479, "stdqueue_k": 0.0533, "stdqueue_q": 0.0, "step": 5100 }, { "accuracy": 30.0781, "active_queue_size": 16384.0, "cl_loss": 5.3424, "doc_norm": 2.0516, "encoder_q-embeddings": 3290.7883, "encoder_q-layer.0": 2949.3013, "encoder_q-layer.1": 2972.5105, "encoder_q-layer.10": 3765.1382, "encoder_q-layer.11": 5845.5996, "encoder_q-layer.2": 2722.3535, "encoder_q-layer.3": 2491.8435, "encoder_q-layer.4": 2353.9434, "encoder_q-layer.5": 2657.1389, "encoder_q-layer.6": 3106.7356, "encoder_q-layer.7": 4555.4126, "encoder_q-layer.8": 5038.7822, "encoder_q-layer.9": 4107.21, "epoch": 0.03, "inbatch_neg_score": 0.697, "inbatch_pos_score": 1.1475, "learning_rate": 2.6000000000000002e-05, "loss": 5.3424, "norm_diff": 0.2071, "norm_loss": 0.0, "num_token_doc": 67.0158, "num_token_overlap": 11.75, "num_token_query": 31.3695, "num_token_union": 65.1972, "num_word_context": 202.6654, "num_word_doc": 50.0218, "num_word_query": 23.2913, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5339.9031, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6904, "query_norm": 1.8444, "queue_k_norm": 2.0608, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3695, "sent_len_1": 67.0158, "sent_len_max_0": 127.2938, "sent_len_max_1": 189.3438, "stdk": 0.0523, "stdq": 0.0473, "stdqueue_k": 0.0528, "stdqueue_q": 0.0, "step": 5200 }, { "accuracy": 33.7891, "active_queue_size": 16384.0, "cl_loss": 5.3362, "doc_norm": 2.0135, "encoder_q-embeddings": 2179.3247, "encoder_q-layer.0": 1709.8868, "encoder_q-layer.1": 1767.226, "encoder_q-layer.10": 5510.3345, "encoder_q-layer.11": 7199.9932, "encoder_q-layer.2": 2023.7473, "encoder_q-layer.3": 1887.9131, "encoder_q-layer.4": 1901.2522, "encoder_q-layer.5": 1780.679, "encoder_q-layer.6": 2117.9744, "encoder_q-layer.7": 2981.0642, "encoder_q-layer.8": 3849.9148, "encoder_q-layer.9": 4665.0181, "epoch": 0.03, "inbatch_neg_score": 0.731, "inbatch_pos_score": 1.1855, "learning_rate": 2.6500000000000004e-05, "loss": 5.3362, "norm_diff": 0.1639, "norm_loss": 0.0, "num_token_doc": 66.6408, "num_token_overlap": 11.5914, "num_token_query": 31.1746, "num_token_union": 64.9995, "num_word_context": 202.0284, "num_word_doc": 49.7717, "num_word_query": 23.1547, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4794.6245, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7231, "query_norm": 1.8495, "queue_k_norm": 2.0183, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.1746, "sent_len_1": 66.6408, "sent_len_max_0": 127.54, "sent_len_max_1": 187.475, "stdk": 0.0519, "stdq": 0.0472, "stdqueue_k": 0.0521, "stdqueue_q": 0.0, "step": 5300 }, { "accuracy": 31.6406, "active_queue_size": 16384.0, "cl_loss": 5.298, "doc_norm": 1.9712, "encoder_q-embeddings": 5826.1333, "encoder_q-layer.0": 4904.832, "encoder_q-layer.1": 4644.5146, "encoder_q-layer.10": 3294.1384, "encoder_q-layer.11": 5191.9863, "encoder_q-layer.2": 4601.335, "encoder_q-layer.3": 4890.686, "encoder_q-layer.4": 4593.9531, "encoder_q-layer.5": 3705.2017, "encoder_q-layer.6": 3459.3289, "encoder_q-layer.7": 2554.0154, "encoder_q-layer.8": 2114.6318, "encoder_q-layer.9": 2097.7532, "epoch": 0.04, "inbatch_neg_score": 0.8257, "inbatch_pos_score": 1.2793, "learning_rate": 2.7000000000000002e-05, "loss": 5.298, "norm_diff": 0.1039, "norm_loss": 0.0, "num_token_doc": 66.7412, "num_token_overlap": 11.6641, "num_token_query": 31.3415, "num_token_union": 65.1013, "num_word_context": 202.3214, "num_word_doc": 49.8004, "num_word_query": 23.2665, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6296.7883, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.8184, "query_norm": 1.8673, "queue_k_norm": 1.9827, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3415, "sent_len_1": 66.7412, "sent_len_max_0": 127.5175, "sent_len_max_1": 187.6712, "stdk": 0.0509, "stdq": 0.0475, "stdqueue_k": 0.0515, "stdqueue_q": 0.0, "step": 5400 }, { "accuracy": 34.668, "active_queue_size": 16384.0, "cl_loss": 5.2506, "doc_norm": 1.95, "encoder_q-embeddings": 2347.4265, "encoder_q-layer.0": 1883.1199, "encoder_q-layer.1": 1885.9445, "encoder_q-layer.10": 2526.5535, "encoder_q-layer.11": 5177.9697, "encoder_q-layer.2": 1921.8578, "encoder_q-layer.3": 1747.528, "encoder_q-layer.4": 1619.5402, "encoder_q-layer.5": 1419.5531, "encoder_q-layer.6": 1434.1917, "encoder_q-layer.7": 1495.6082, "encoder_q-layer.8": 1890.3459, "encoder_q-layer.9": 1847.6329, "epoch": 0.04, "inbatch_neg_score": 0.8472, "inbatch_pos_score": 1.3271, "learning_rate": 2.7500000000000004e-05, "loss": 5.2506, "norm_diff": 0.0674, "norm_loss": 0.0, "num_token_doc": 66.8616, "num_token_overlap": 11.7155, "num_token_query": 31.4859, "num_token_union": 65.2434, "num_word_context": 202.4736, "num_word_doc": 49.9025, "num_word_query": 23.3834, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3471.1076, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8428, "query_norm": 1.8826, "queue_k_norm": 1.9535, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4859, "sent_len_1": 66.8616, "sent_len_max_0": 127.3812, "sent_len_max_1": 189.11, "stdk": 0.0511, "stdq": 0.0483, "stdqueue_k": 0.051, "stdqueue_q": 0.0, "step": 5500 }, { "accuracy": 32.3242, "active_queue_size": 16384.0, "cl_loss": 5.156, "doc_norm": 1.9186, "encoder_q-embeddings": 1184.5355, "encoder_q-layer.0": 934.7421, "encoder_q-layer.1": 948.7661, "encoder_q-layer.10": 2141.25, "encoder_q-layer.11": 3988.3457, "encoder_q-layer.2": 1049.3114, "encoder_q-layer.3": 1080.9277, "encoder_q-layer.4": 1201.5092, "encoder_q-layer.5": 1309.6331, "encoder_q-layer.6": 1746.478, "encoder_q-layer.7": 2144.0178, "encoder_q-layer.8": 2219.8071, "encoder_q-layer.9": 1810.3354, "epoch": 0.04, "inbatch_neg_score": 0.8309, "inbatch_pos_score": 1.2822, "learning_rate": 2.8000000000000003e-05, "loss": 5.156, "norm_diff": 0.0314, "norm_loss": 0.0, "num_token_doc": 66.6675, "num_token_overlap": 11.7615, "num_token_query": 31.6271, "num_token_union": 65.1237, "num_word_context": 201.9516, "num_word_doc": 49.7471, "num_word_query": 23.4994, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2672.9285, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8237, "query_norm": 1.897, "queue_k_norm": 1.9286, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.6271, "sent_len_1": 66.6675, "sent_len_max_0": 127.5525, "sent_len_max_1": 189.1, "stdk": 0.05, "stdq": 0.048, "stdqueue_k": 0.0505, "stdqueue_q": 0.0, "step": 5600 }, { "accuracy": 34.375, "active_queue_size": 16384.0, "cl_loss": 5.1755, "doc_norm": 1.9091, "encoder_q-embeddings": 1467.5647, "encoder_q-layer.0": 1089.9301, "encoder_q-layer.1": 1087.4164, "encoder_q-layer.10": 2417.717, "encoder_q-layer.11": 4228.686, "encoder_q-layer.2": 1168.0294, "encoder_q-layer.3": 1179.575, "encoder_q-layer.4": 1152.4207, "encoder_q-layer.5": 1073.1865, "encoder_q-layer.6": 1106.2563, "encoder_q-layer.7": 1186.5078, "encoder_q-layer.8": 1449.9996, "encoder_q-layer.9": 1686.0576, "epoch": 0.04, "inbatch_neg_score": 0.9129, "inbatch_pos_score": 1.3633, "learning_rate": 2.8499999999999998e-05, "loss": 5.1755, "norm_diff": 0.0171, "norm_loss": 0.0, "num_token_doc": 66.8494, "num_token_overlap": 11.6293, "num_token_query": 31.3798, "num_token_union": 65.1819, "num_word_context": 202.3895, "num_word_doc": 49.8607, "num_word_query": 23.2868, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2592.5518, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9043, "query_norm": 1.9166, "queue_k_norm": 1.9114, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3798, "sent_len_1": 66.8494, "sent_len_max_0": 127.5062, "sent_len_max_1": 189.7688, "stdk": 0.0498, "stdq": 0.0471, "stdqueue_k": 0.0502, "stdqueue_q": 0.0, "step": 5700 }, { "accuracy": 33.5938, "active_queue_size": 16384.0, "cl_loss": 5.1377, "doc_norm": 1.8961, "encoder_q-embeddings": 3667.2659, "encoder_q-layer.0": 2478.769, "encoder_q-layer.1": 2392.5173, "encoder_q-layer.10": 3414.8979, "encoder_q-layer.11": 5504.8643, "encoder_q-layer.2": 2586.918, "encoder_q-layer.3": 2817.4094, "encoder_q-layer.4": 2766.7268, "encoder_q-layer.5": 2960.8022, "encoder_q-layer.6": 3488.2361, "encoder_q-layer.7": 3328.3821, "encoder_q-layer.8": 3321.606, "encoder_q-layer.9": 3291.9714, "epoch": 0.04, "inbatch_neg_score": 0.961, "inbatch_pos_score": 1.4395, "learning_rate": 2.9e-05, "loss": 5.1377, "norm_diff": 0.0802, "norm_loss": 0.0, "num_token_doc": 66.9434, "num_token_overlap": 11.6625, "num_token_query": 31.2741, "num_token_union": 65.1608, "num_word_context": 202.4815, "num_word_doc": 49.9258, "num_word_query": 23.2263, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4902.1939, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9541, "query_norm": 1.9763, "queue_k_norm": 1.8932, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2741, "sent_len_1": 66.9434, "sent_len_max_0": 127.6125, "sent_len_max_1": 190.2575, "stdk": 0.0495, "stdq": 0.0475, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 5800 }, { "accuracy": 35.4492, "active_queue_size": 16384.0, "cl_loss": 5.1032, "doc_norm": 1.8733, "encoder_q-embeddings": 1662.1508, "encoder_q-layer.0": 1361.3956, "encoder_q-layer.1": 1352.9426, "encoder_q-layer.10": 2238.2375, "encoder_q-layer.11": 4124.0337, "encoder_q-layer.2": 1431.8844, "encoder_q-layer.3": 1404.1478, "encoder_q-layer.4": 1440.5446, "encoder_q-layer.5": 1471.8928, "encoder_q-layer.6": 1882.6365, "encoder_q-layer.7": 2580.5227, "encoder_q-layer.8": 2748.886, "encoder_q-layer.9": 2097.813, "epoch": 0.04, "inbatch_neg_score": 1.0077, "inbatch_pos_score": 1.4844, "learning_rate": 2.95e-05, "loss": 5.1032, "norm_diff": 0.1043, "norm_loss": 0.0, "num_token_doc": 67.0125, "num_token_overlap": 11.6026, "num_token_query": 31.2482, "num_token_union": 65.2727, "num_word_context": 202.3216, "num_word_doc": 49.9884, "num_word_query": 23.2063, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3031.0798, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.999, "query_norm": 1.9775, "queue_k_norm": 1.8808, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2482, "sent_len_1": 67.0125, "sent_len_max_0": 127.6188, "sent_len_max_1": 188.61, "stdk": 0.0489, "stdq": 0.0474, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 5900 }, { "accuracy": 34.8633, "active_queue_size": 16384.0, "cl_loss": 5.0579, "doc_norm": 1.8727, "encoder_q-embeddings": 1653.8768, "encoder_q-layer.0": 1300.3502, "encoder_q-layer.1": 1264.8793, "encoder_q-layer.10": 2309.7705, "encoder_q-layer.11": 4212.8477, "encoder_q-layer.2": 1346.52, "encoder_q-layer.3": 1413.8937, "encoder_q-layer.4": 1337.1744, "encoder_q-layer.5": 1194.7123, "encoder_q-layer.6": 1436.4604, "encoder_q-layer.7": 1789.8134, "encoder_q-layer.8": 2336.6194, "encoder_q-layer.9": 2136.1589, "epoch": 0.04, "inbatch_neg_score": 1.0154, "inbatch_pos_score": 1.4824, "learning_rate": 3e-05, "loss": 5.0579, "norm_diff": 0.0993, "norm_loss": 0.0, "num_token_doc": 66.7039, "num_token_overlap": 11.6769, "num_token_query": 31.4942, "num_token_union": 65.2056, "num_word_context": 202.2341, "num_word_doc": 49.8102, "num_word_query": 23.4023, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2863.9133, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0078, "query_norm": 1.972, "queue_k_norm": 1.8737, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4942, "sent_len_1": 66.7039, "sent_len_max_0": 127.505, "sent_len_max_1": 188.0662, "stdk": 0.049, "stdq": 0.0461, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 6000 }, { "accuracy": 34.4727, "active_queue_size": 16384.0, "cl_loss": 5.0124, "doc_norm": 1.8758, "encoder_q-embeddings": 27801.7832, "encoder_q-layer.0": 24864.0938, "encoder_q-layer.1": 20941.4609, "encoder_q-layer.10": 4716.3994, "encoder_q-layer.11": 8252.8779, "encoder_q-layer.2": 18684.3809, "encoder_q-layer.3": 17986.3086, "encoder_q-layer.4": 14907.3027, "encoder_q-layer.5": 14205.7773, "encoder_q-layer.6": 12041.7119, "encoder_q-layer.7": 5556.8667, "encoder_q-layer.8": 3328.3369, "encoder_q-layer.9": 2815.521, "epoch": 0.04, "inbatch_neg_score": 1.0207, "inbatch_pos_score": 1.499, "learning_rate": 3.05e-05, "loss": 5.0124, "norm_diff": 0.1609, "norm_loss": 0.0, "num_token_doc": 66.9102, "num_token_overlap": 11.7363, "num_token_query": 31.4002, "num_token_union": 65.1673, "num_word_context": 202.2808, "num_word_doc": 49.8866, "num_word_query": 23.3305, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25548.2454, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 1.0156, "query_norm": 2.0367, "queue_k_norm": 1.8662, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4002, "sent_len_1": 66.9102, "sent_len_max_0": 127.425, "sent_len_max_1": 190.4363, "stdk": 0.0492, "stdq": 0.0481, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 6100 }, { "accuracy": 34.668, "active_queue_size": 16384.0, "cl_loss": 5.0021, "doc_norm": 1.8542, "encoder_q-embeddings": 2901.6414, "encoder_q-layer.0": 2441.949, "encoder_q-layer.1": 2540.5623, "encoder_q-layer.10": 6496.6465, "encoder_q-layer.11": 11673.8711, "encoder_q-layer.2": 2545.7808, "encoder_q-layer.3": 2402.7629, "encoder_q-layer.4": 2342.5391, "encoder_q-layer.5": 2356.6328, "encoder_q-layer.6": 2722.175, "encoder_q-layer.7": 3212.3169, "encoder_q-layer.8": 3695.1523, "encoder_q-layer.9": 4236.9214, "epoch": 0.04, "inbatch_neg_score": 1.0878, "inbatch_pos_score": 1.5488, "learning_rate": 3.1e-05, "loss": 5.0021, "norm_diff": 0.1895, "norm_loss": 0.0, "num_token_doc": 66.6154, "num_token_overlap": 11.6939, "num_token_query": 31.4433, "num_token_union": 65.0543, "num_word_context": 202.0886, "num_word_doc": 49.6991, "num_word_query": 23.3394, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6594.7852, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.0801, "query_norm": 2.0436, "queue_k_norm": 1.8649, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4433, "sent_len_1": 66.6154, "sent_len_max_0": 127.3838, "sent_len_max_1": 190.5662, "stdk": 0.0482, "stdq": 0.0466, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 6200 }, { "accuracy": 35.6445, "active_queue_size": 16384.0, "cl_loss": 4.9258, "doc_norm": 1.8503, "encoder_q-embeddings": 6062.8589, "encoder_q-layer.0": 4682.5796, "encoder_q-layer.1": 4614.8965, "encoder_q-layer.10": 4539.3584, "encoder_q-layer.11": 9149.998, "encoder_q-layer.2": 4398.3584, "encoder_q-layer.3": 4503.5239, "encoder_q-layer.4": 3675.4639, "encoder_q-layer.5": 3081.7556, "encoder_q-layer.6": 2730.0962, "encoder_q-layer.7": 2290.1528, "encoder_q-layer.8": 2499.9453, "encoder_q-layer.9": 2657.3489, "epoch": 0.04, "inbatch_neg_score": 1.0948, "inbatch_pos_score": 1.5654, "learning_rate": 3.15e-05, "loss": 4.9258, "norm_diff": 0.2917, "norm_loss": 0.0, "num_token_doc": 66.5839, "num_token_overlap": 11.7312, "num_token_query": 31.5477, "num_token_union": 65.0657, "num_word_context": 202.2379, "num_word_doc": 49.7011, "num_word_query": 23.4477, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7053.6778, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.084, "query_norm": 2.142, "queue_k_norm": 1.8588, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.5477, "sent_len_1": 66.5839, "sent_len_max_0": 127.5325, "sent_len_max_1": 188.7725, "stdk": 0.048, "stdq": 0.0476, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 6300 }, { "accuracy": 34.8633, "active_queue_size": 16384.0, "cl_loss": 4.9113, "doc_norm": 1.8491, "encoder_q-embeddings": 11500.1738, "encoder_q-layer.0": 9294.6738, "encoder_q-layer.1": 9984.6436, "encoder_q-layer.10": 4253.0815, "encoder_q-layer.11": 7441.7515, "encoder_q-layer.2": 9701.9443, "encoder_q-layer.3": 6983.2881, "encoder_q-layer.4": 5682.0669, "encoder_q-layer.5": 3631.8806, "encoder_q-layer.6": 2905.4124, "encoder_q-layer.7": 2656.8345, "encoder_q-layer.8": 3257.835, "encoder_q-layer.9": 3165.9868, "epoch": 0.04, "inbatch_neg_score": 1.1366, "inbatch_pos_score": 1.6016, "learning_rate": 3.2000000000000005e-05, "loss": 4.9113, "norm_diff": 0.345, "norm_loss": 0.0, "num_token_doc": 66.8384, "num_token_overlap": 11.697, "num_token_query": 31.4694, "num_token_union": 65.1968, "num_word_context": 202.5136, "num_word_doc": 49.8827, "num_word_query": 23.3904, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10800.7524, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.124, "query_norm": 2.1942, "queue_k_norm": 1.8568, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4694, "sent_len_1": 66.8384, "sent_len_max_0": 127.4862, "sent_len_max_1": 189.315, "stdk": 0.0477, "stdq": 0.0463, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 6400 }, { "accuracy": 33.3008, "active_queue_size": 16384.0, "cl_loss": 4.9036, "doc_norm": 1.8642, "encoder_q-embeddings": 3722.2539, "encoder_q-layer.0": 2688.3477, "encoder_q-layer.1": 2699.0483, "encoder_q-layer.10": 3904.8701, "encoder_q-layer.11": 7042.9229, "encoder_q-layer.2": 2961.7651, "encoder_q-layer.3": 2775.2715, "encoder_q-layer.4": 2656.4636, "encoder_q-layer.5": 2545.8303, "encoder_q-layer.6": 2660.0986, "encoder_q-layer.7": 2670.783, "encoder_q-layer.8": 3169.6448, "encoder_q-layer.9": 3035.3555, "epoch": 0.04, "inbatch_neg_score": 1.215, "inbatch_pos_score": 1.6953, "learning_rate": 3.2500000000000004e-05, "loss": 4.9036, "norm_diff": 0.4218, "norm_loss": 0.0, "num_token_doc": 66.8876, "num_token_overlap": 11.6243, "num_token_query": 31.2884, "num_token_union": 65.1678, "num_word_context": 202.3212, "num_word_doc": 49.8658, "num_word_query": 23.2525, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5039.3895, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2002, "query_norm": 2.286, "queue_k_norm": 1.8563, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2884, "sent_len_1": 66.8876, "sent_len_max_0": 127.5263, "sent_len_max_1": 190.2537, "stdk": 0.048, "stdq": 0.0476, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 6500 }, { "accuracy": 34.1797, "active_queue_size": 16384.0, "cl_loss": 4.8466, "doc_norm": 1.861, "encoder_q-embeddings": 6090.8115, "encoder_q-layer.0": 4703.2866, "encoder_q-layer.1": 4700.3711, "encoder_q-layer.10": 4260.2598, "encoder_q-layer.11": 8465.9727, "encoder_q-layer.2": 4242.9429, "encoder_q-layer.3": 3879.8872, "encoder_q-layer.4": 3360.7556, "encoder_q-layer.5": 3126.698, "encoder_q-layer.6": 3293.4922, "encoder_q-layer.7": 3475.0454, "encoder_q-layer.8": 4137.2324, "encoder_q-layer.9": 3713.209, "epoch": 0.04, "inbatch_neg_score": 1.1755, "inbatch_pos_score": 1.6465, "learning_rate": 3.3e-05, "loss": 4.8466, "norm_diff": 0.3999, "norm_loss": 0.0, "num_token_doc": 66.9518, "num_token_overlap": 11.6617, "num_token_query": 31.3497, "num_token_union": 65.1864, "num_word_context": 202.6086, "num_word_doc": 49.9345, "num_word_query": 23.2473, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7008.0039, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.1611, "query_norm": 2.2609, "queue_k_norm": 1.8593, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3497, "sent_len_1": 66.9518, "sent_len_max_0": 127.3487, "sent_len_max_1": 192.315, "stdk": 0.0477, "stdq": 0.0462, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 6600 }, { "accuracy": 36.5234, "active_queue_size": 16384.0, "cl_loss": 4.8217, "doc_norm": 1.8558, "encoder_q-embeddings": 7279.918, "encoder_q-layer.0": 5524.2661, "encoder_q-layer.1": 5273.0376, "encoder_q-layer.10": 4322.2529, "encoder_q-layer.11": 7547.188, "encoder_q-layer.2": 5685.8516, "encoder_q-layer.3": 5572.2881, "encoder_q-layer.4": 5229.5229, "encoder_q-layer.5": 4919.8403, "encoder_q-layer.6": 4770.1675, "encoder_q-layer.7": 3257.6328, "encoder_q-layer.8": 3016.2441, "encoder_q-layer.9": 2865.7554, "epoch": 0.04, "inbatch_neg_score": 1.1763, "inbatch_pos_score": 1.6826, "learning_rate": 3.35e-05, "loss": 4.8217, "norm_diff": 0.5036, "norm_loss": 0.0, "num_token_doc": 66.9665, "num_token_overlap": 11.7227, "num_token_query": 31.4261, "num_token_union": 65.2248, "num_word_context": 202.23, "num_word_doc": 49.9698, "num_word_query": 23.3295, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7863.5989, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.1641, "query_norm": 2.3595, "queue_k_norm": 1.8585, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4261, "sent_len_1": 66.9665, "sent_len_max_0": 127.3312, "sent_len_max_1": 188.5575, "stdk": 0.0475, "stdq": 0.0483, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 6700 }, { "accuracy": 34.2773, "active_queue_size": 16384.0, "cl_loss": 4.8149, "doc_norm": 1.863, "encoder_q-embeddings": 4902.8101, "encoder_q-layer.0": 3873.5583, "encoder_q-layer.1": 3659.4954, "encoder_q-layer.10": 3871.7305, "encoder_q-layer.11": 8045.2891, "encoder_q-layer.2": 3895.0454, "encoder_q-layer.3": 3696.6887, "encoder_q-layer.4": 3193.4084, "encoder_q-layer.5": 2852.6001, "encoder_q-layer.6": 2803.1931, "encoder_q-layer.7": 2318.9771, "encoder_q-layer.8": 2873.8953, "encoder_q-layer.9": 2854.7161, "epoch": 0.04, "inbatch_neg_score": 1.1329, "inbatch_pos_score": 1.6016, "learning_rate": 3.4000000000000007e-05, "loss": 4.8149, "norm_diff": 0.4712, "norm_loss": 0.0, "num_token_doc": 66.7789, "num_token_overlap": 11.6396, "num_token_query": 31.3127, "num_token_union": 65.1204, "num_word_context": 202.4905, "num_word_doc": 49.8516, "num_word_query": 23.2735, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6088.8041, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.1152, "query_norm": 2.3342, "queue_k_norm": 1.8566, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3127, "sent_len_1": 66.7789, "sent_len_max_0": 127.5, "sent_len_max_1": 191.0062, "stdk": 0.0476, "stdq": 0.0475, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 6800 }, { "accuracy": 35.8398, "active_queue_size": 16384.0, "cl_loss": 4.7699, "doc_norm": 1.8625, "encoder_q-embeddings": 5084.0776, "encoder_q-layer.0": 4198.2554, "encoder_q-layer.1": 4078.2051, "encoder_q-layer.10": 5388.4805, "encoder_q-layer.11": 11759.7432, "encoder_q-layer.2": 4158.7446, "encoder_q-layer.3": 4014.4949, "encoder_q-layer.4": 3638.0649, "encoder_q-layer.5": 3245.655, "encoder_q-layer.6": 3294.0935, "encoder_q-layer.7": 3352.5266, "encoder_q-layer.8": 3070.9434, "encoder_q-layer.9": 3004.469, "epoch": 0.04, "inbatch_neg_score": 1.0211, "inbatch_pos_score": 1.5, "learning_rate": 3.45e-05, "loss": 4.7699, "norm_diff": 0.4947, "norm_loss": 0.0, "num_token_doc": 66.7463, "num_token_overlap": 11.7767, "num_token_query": 31.6914, "num_token_union": 65.2167, "num_word_context": 202.1102, "num_word_doc": 49.7967, "num_word_query": 23.5873, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7478.8606, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.0059, "query_norm": 2.3573, "queue_k_norm": 1.8551, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.6914, "sent_len_1": 66.7463, "sent_len_max_0": 127.7387, "sent_len_max_1": 188.555, "stdk": 0.0476, "stdq": 0.0476, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 6900 }, { "accuracy": 34.1797, "active_queue_size": 16384.0, "cl_loss": 4.7867, "doc_norm": 1.8455, "encoder_q-embeddings": 3537.5457, "encoder_q-layer.0": 2733.5645, "encoder_q-layer.1": 2665.5947, "encoder_q-layer.10": 3372.9927, "encoder_q-layer.11": 6816.6479, "encoder_q-layer.2": 2835.6655, "encoder_q-layer.3": 2774.0759, "encoder_q-layer.4": 2808.0178, "encoder_q-layer.5": 2496.2817, "encoder_q-layer.6": 2723.1951, "encoder_q-layer.7": 2550.6824, "encoder_q-layer.8": 2907.8145, "encoder_q-layer.9": 2599.9182, "epoch": 0.05, "inbatch_neg_score": 0.9712, "inbatch_pos_score": 1.4561, "learning_rate": 3.5e-05, "loss": 4.7867, "norm_diff": 0.5041, "norm_loss": 0.0, "num_token_doc": 66.545, "num_token_overlap": 11.6437, "num_token_query": 31.4427, "num_token_union": 65.0423, "num_word_context": 202.1545, "num_word_doc": 49.6914, "num_word_query": 23.3605, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4932.4581, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9541, "query_norm": 2.3496, "queue_k_norm": 1.8499, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4427, "sent_len_1": 66.545, "sent_len_max_0": 127.5487, "sent_len_max_1": 189.1475, "stdk": 0.0469, "stdq": 0.0472, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 7000 }, { "accuracy": 37.793, "active_queue_size": 16384.0, "cl_loss": 4.783, "doc_norm": 1.8434, "encoder_q-embeddings": 16130.9658, "encoder_q-layer.0": 12861.6357, "encoder_q-layer.1": 11078.2539, "encoder_q-layer.10": 4000.7134, "encoder_q-layer.11": 8125.77, "encoder_q-layer.2": 12751.6816, "encoder_q-layer.3": 10625.6064, "encoder_q-layer.4": 9470.6348, "encoder_q-layer.5": 7339.6191, "encoder_q-layer.6": 7575.8818, "encoder_q-layer.7": 5023.1104, "encoder_q-layer.8": 2977.9185, "encoder_q-layer.9": 2714.1194, "epoch": 0.05, "inbatch_neg_score": 0.873, "inbatch_pos_score": 1.3691, "learning_rate": 3.55e-05, "loss": 4.783, "norm_diff": 0.5243, "norm_loss": 0.0, "num_token_doc": 66.7249, "num_token_overlap": 11.6801, "num_token_query": 31.3526, "num_token_union": 65.0723, "num_word_context": 202.1514, "num_word_doc": 49.7657, "num_word_query": 23.279, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14926.2056, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.8555, "query_norm": 2.3677, "queue_k_norm": 1.8477, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3526, "sent_len_1": 66.7249, "sent_len_max_0": 127.4887, "sent_len_max_1": 189.6912, "stdk": 0.047, "stdq": 0.0477, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 7100 }, { "accuracy": 35.5469, "active_queue_size": 16384.0, "cl_loss": 4.7622, "doc_norm": 1.8342, "encoder_q-embeddings": 5927.6982, "encoder_q-layer.0": 5212.7964, "encoder_q-layer.1": 4289.3291, "encoder_q-layer.10": 4686.666, "encoder_q-layer.11": 8106.6987, "encoder_q-layer.2": 3874.9753, "encoder_q-layer.3": 3728.9648, "encoder_q-layer.4": 3361.4404, "encoder_q-layer.5": 2834.575, "encoder_q-layer.6": 2908.1714, "encoder_q-layer.7": 3493.5591, "encoder_q-layer.8": 4277.7236, "encoder_q-layer.9": 3769.5771, "epoch": 0.05, "inbatch_neg_score": 0.8126, "inbatch_pos_score": 1.2725, "learning_rate": 3.6e-05, "loss": 4.7622, "norm_diff": 0.5978, "norm_loss": 0.0, "num_token_doc": 66.8239, "num_token_overlap": 11.6478, "num_token_query": 31.2795, "num_token_union": 65.1112, "num_word_context": 202.2087, "num_word_doc": 49.9108, "num_word_query": 23.2288, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6892.1222, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7935, "query_norm": 2.432, "queue_k_norm": 1.8373, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2795, "sent_len_1": 66.8239, "sent_len_max_0": 127.5575, "sent_len_max_1": 187.305, "stdk": 0.0468, "stdq": 0.0464, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 7200 }, { "accuracy": 36.7188, "active_queue_size": 16384.0, "cl_loss": 4.7395, "doc_norm": 1.8316, "encoder_q-embeddings": 2980.1296, "encoder_q-layer.0": 2317.9312, "encoder_q-layer.1": 2347.2954, "encoder_q-layer.10": 4201.9014, "encoder_q-layer.11": 8074.5288, "encoder_q-layer.2": 2586.4954, "encoder_q-layer.3": 2501.478, "encoder_q-layer.4": 2571.0068, "encoder_q-layer.5": 2395.7461, "encoder_q-layer.6": 2431.7671, "encoder_q-layer.7": 2399.3115, "encoder_q-layer.8": 2808.8726, "encoder_q-layer.9": 2951.2305, "epoch": 0.05, "inbatch_neg_score": 0.7922, "inbatch_pos_score": 1.293, "learning_rate": 3.65e-05, "loss": 4.7395, "norm_diff": 0.6743, "norm_loss": 0.0, "num_token_doc": 66.7715, "num_token_overlap": 11.607, "num_token_query": 31.1317, "num_token_union": 65.0892, "num_word_context": 202.3901, "num_word_doc": 49.808, "num_word_query": 23.1219, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4936.4764, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.772, "query_norm": 2.5059, "queue_k_norm": 1.8276, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.1317, "sent_len_1": 66.7715, "sent_len_max_0": 127.1912, "sent_len_max_1": 187.5613, "stdk": 0.0473, "stdq": 0.0482, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 7300 }, { "accuracy": 37.4023, "active_queue_size": 16384.0, "cl_loss": 4.7088, "doc_norm": 1.8216, "encoder_q-embeddings": 2456.625, "encoder_q-layer.0": 1706.4325, "encoder_q-layer.1": 1859.4668, "encoder_q-layer.10": 3304.0171, "encoder_q-layer.11": 7112.3008, "encoder_q-layer.2": 1927.7881, "encoder_q-layer.3": 1835.2854, "encoder_q-layer.4": 1844.8375, "encoder_q-layer.5": 1717.1879, "encoder_q-layer.6": 1799.7545, "encoder_q-layer.7": 2059.4368, "encoder_q-layer.8": 2661.0125, "encoder_q-layer.9": 2626.4727, "epoch": 0.05, "inbatch_neg_score": 0.7071, "inbatch_pos_score": 1.1904, "learning_rate": 3.7e-05, "loss": 4.7088, "norm_diff": 0.6631, "norm_loss": 0.0, "num_token_doc": 66.7393, "num_token_overlap": 11.6507, "num_token_query": 31.2976, "num_token_union": 65.0441, "num_word_context": 202.2795, "num_word_doc": 49.8067, "num_word_query": 23.2316, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4250.9468, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.686, "query_norm": 2.4847, "queue_k_norm": 1.8176, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2976, "sent_len_1": 66.7393, "sent_len_max_0": 127.3037, "sent_len_max_1": 190.1838, "stdk": 0.0471, "stdq": 0.0468, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 7400 }, { "accuracy": 35.0586, "active_queue_size": 16384.0, "cl_loss": 4.6589, "doc_norm": 1.81, "encoder_q-embeddings": 22320.1875, "encoder_q-layer.0": 18773.2461, "encoder_q-layer.1": 15378.4502, "encoder_q-layer.10": 5268.457, "encoder_q-layer.11": 9583.8955, "encoder_q-layer.2": 13521.5215, "encoder_q-layer.3": 9243.1943, "encoder_q-layer.4": 5560.6187, "encoder_q-layer.5": 3377.6062, "encoder_q-layer.6": 2651.8074, "encoder_q-layer.7": 2494.2261, "encoder_q-layer.8": 2852.2605, "encoder_q-layer.9": 2940.0303, "epoch": 0.05, "inbatch_neg_score": 0.7651, "inbatch_pos_score": 1.2383, "learning_rate": 3.7500000000000003e-05, "loss": 4.6589, "norm_diff": 0.8285, "norm_loss": 0.0, "num_token_doc": 66.7043, "num_token_overlap": 11.6763, "num_token_query": 31.425, "num_token_union": 65.1218, "num_word_context": 202.2365, "num_word_doc": 49.7814, "num_word_query": 23.3558, "postclip_grad_norm": 1.0, "preclip_grad_norm": 18032.702, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.7412, "query_norm": 2.6385, "queue_k_norm": 1.8076, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.425, "sent_len_1": 66.7043, "sent_len_max_0": 127.6063, "sent_len_max_1": 191.5737, "stdk": 0.0469, "stdq": 0.0468, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 7500 }, { "accuracy": 37.793, "active_queue_size": 16384.0, "cl_loss": 4.6767, "doc_norm": 1.8014, "encoder_q-embeddings": 2457.8413, "encoder_q-layer.0": 1836.8101, "encoder_q-layer.1": 1745.0715, "encoder_q-layer.10": 3422.1384, "encoder_q-layer.11": 6246.1094, "encoder_q-layer.2": 1994.0647, "encoder_q-layer.3": 1944.7253, "encoder_q-layer.4": 1959.2167, "encoder_q-layer.5": 1867.0742, "encoder_q-layer.6": 1945.8411, "encoder_q-layer.7": 2152.0049, "encoder_q-layer.8": 2722.6003, "encoder_q-layer.9": 2469.8428, "epoch": 0.05, "inbatch_neg_score": 0.7133, "inbatch_pos_score": 1.1992, "learning_rate": 3.8e-05, "loss": 4.6767, "norm_diff": 0.8368, "norm_loss": 0.0, "num_token_doc": 66.7979, "num_token_overlap": 11.7264, "num_token_query": 31.4433, "num_token_union": 65.1084, "num_word_context": 202.249, "num_word_doc": 49.8141, "num_word_query": 23.361, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4088.9365, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.689, "query_norm": 2.6382, "queue_k_norm": 1.799, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4433, "sent_len_1": 66.7979, "sent_len_max_0": 127.49, "sent_len_max_1": 191.725, "stdk": 0.047, "stdq": 0.0473, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 7600 }, { "accuracy": 38.1836, "active_queue_size": 16384.0, "cl_loss": 4.6676, "doc_norm": 1.7842, "encoder_q-embeddings": 4803.1616, "encoder_q-layer.0": 3701.6729, "encoder_q-layer.1": 3531.9299, "encoder_q-layer.10": 7646.2705, "encoder_q-layer.11": 20268.5488, "encoder_q-layer.2": 3692.0994, "encoder_q-layer.3": 3365.3477, "encoder_q-layer.4": 3065.0095, "encoder_q-layer.5": 2876.6284, "encoder_q-layer.6": 2687.0515, "encoder_q-layer.7": 3078.3374, "encoder_q-layer.8": 3787.2104, "encoder_q-layer.9": 3710.8411, "epoch": 0.05, "inbatch_neg_score": 0.7483, "inbatch_pos_score": 1.2207, "learning_rate": 3.85e-05, "loss": 4.6676, "norm_diff": 0.8213, "norm_loss": 0.0, "num_token_doc": 66.8998, "num_token_overlap": 11.633, "num_token_query": 31.3267, "num_token_union": 65.2314, "num_word_context": 202.4358, "num_word_doc": 49.8958, "num_word_query": 23.2655, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10735.7981, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7256, "query_norm": 2.6054, "queue_k_norm": 1.7909, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3267, "sent_len_1": 66.8998, "sent_len_max_0": 127.51, "sent_len_max_1": 191.7612, "stdk": 0.0466, "stdq": 0.047, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 7700 }, { "accuracy": 37.793, "active_queue_size": 16384.0, "cl_loss": 4.6632, "doc_norm": 1.7814, "encoder_q-embeddings": 7587.0239, "encoder_q-layer.0": 5659.8154, "encoder_q-layer.1": 4707.5669, "encoder_q-layer.10": 3338.5166, "encoder_q-layer.11": 7611.4033, "encoder_q-layer.2": 4581.1528, "encoder_q-layer.3": 3203.4282, "encoder_q-layer.4": 3065.7664, "encoder_q-layer.5": 2362.9224, "encoder_q-layer.6": 2264.4236, "encoder_q-layer.7": 2447.615, "encoder_q-layer.8": 3125.2312, "encoder_q-layer.9": 2888.9067, "epoch": 0.05, "inbatch_neg_score": 0.7426, "inbatch_pos_score": 1.2217, "learning_rate": 3.9000000000000006e-05, "loss": 4.6632, "norm_diff": 0.7604, "norm_loss": 0.0, "num_token_doc": 66.7754, "num_token_overlap": 11.6063, "num_token_query": 31.1814, "num_token_union": 65.0361, "num_word_context": 202.0387, "num_word_doc": 49.8549, "num_word_query": 23.1215, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6942.9286, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7231, "query_norm": 2.5418, "queue_k_norm": 1.7825, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.1814, "sent_len_1": 66.7754, "sent_len_max_0": 127.395, "sent_len_max_1": 187.9275, "stdk": 0.0468, "stdq": 0.0454, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 7800 }, { "accuracy": 38.3789, "active_queue_size": 16384.0, "cl_loss": 4.6464, "doc_norm": 1.7764, "encoder_q-embeddings": 4204.8774, "encoder_q-layer.0": 3330.2695, "encoder_q-layer.1": 3106.5403, "encoder_q-layer.10": 5022.9614, "encoder_q-layer.11": 10395.9268, "encoder_q-layer.2": 3539.5605, "encoder_q-layer.3": 3353.8557, "encoder_q-layer.4": 2986.2612, "encoder_q-layer.5": 2760.3311, "encoder_q-layer.6": 2671.2476, "encoder_q-layer.7": 2590.7007, "encoder_q-layer.8": 2905.7954, "encoder_q-layer.9": 2883.0317, "epoch": 0.05, "inbatch_neg_score": 0.7486, "inbatch_pos_score": 1.2314, "learning_rate": 3.9500000000000005e-05, "loss": 4.6464, "norm_diff": 0.7327, "norm_loss": 0.0, "num_token_doc": 66.7469, "num_token_overlap": 11.6144, "num_token_query": 31.2607, "num_token_union": 65.0743, "num_word_context": 201.9954, "num_word_doc": 49.8137, "num_word_query": 23.2, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6399.699, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7305, "query_norm": 2.5091, "queue_k_norm": 1.7734, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2607, "sent_len_1": 66.7469, "sent_len_max_0": 127.5062, "sent_len_max_1": 189.6438, "stdk": 0.0467, "stdq": 0.046, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 7900 }, { "accuracy": 37.9883, "active_queue_size": 16384.0, "cl_loss": 4.5885, "doc_norm": 1.7663, "encoder_q-embeddings": 2018.9476, "encoder_q-layer.0": 1435.5804, "encoder_q-layer.1": 1419.2423, "encoder_q-layer.10": 3532.3079, "encoder_q-layer.11": 7462.2617, "encoder_q-layer.2": 1543.7789, "encoder_q-layer.3": 1611.6761, "encoder_q-layer.4": 1628.3835, "encoder_q-layer.5": 1584.6781, "encoder_q-layer.6": 1892.5702, "encoder_q-layer.7": 1993.8486, "encoder_q-layer.8": 2282.1199, "encoder_q-layer.9": 2266.6711, "epoch": 0.05, "inbatch_neg_score": 0.6333, "inbatch_pos_score": 1.125, "learning_rate": 4e-05, "loss": 4.5885, "norm_diff": 0.6961, "norm_loss": 0.0, "num_token_doc": 66.86, "num_token_overlap": 11.6993, "num_token_query": 31.3564, "num_token_union": 65.1006, "num_word_context": 202.3566, "num_word_doc": 49.8701, "num_word_query": 23.2895, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4198.0975, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6143, "query_norm": 2.4624, "queue_k_norm": 1.7701, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3564, "sent_len_1": 66.86, "sent_len_max_0": 127.5362, "sent_len_max_1": 188.9787, "stdk": 0.0467, "stdq": 0.046, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 8000 }, { "accuracy": 37.793, "active_queue_size": 16384.0, "cl_loss": 4.5805, "doc_norm": 1.7578, "encoder_q-embeddings": 5699.5166, "encoder_q-layer.0": 4223.9375, "encoder_q-layer.1": 4218.8105, "encoder_q-layer.10": 5487.5024, "encoder_q-layer.11": 12668.9463, "encoder_q-layer.2": 4863.8267, "encoder_q-layer.3": 4677.6143, "encoder_q-layer.4": 4865.3262, "encoder_q-layer.5": 4564.8589, "encoder_q-layer.6": 4660.0771, "encoder_q-layer.7": 3936.9114, "encoder_q-layer.8": 4163.5903, "encoder_q-layer.9": 4064.0869, "epoch": 0.05, "inbatch_neg_score": 0.642, "inbatch_pos_score": 1.1309, "learning_rate": 4.05e-05, "loss": 4.5805, "norm_diff": 0.8103, "norm_loss": 0.0, "num_token_doc": 66.7564, "num_token_overlap": 11.6609, "num_token_query": 31.3006, "num_token_union": 65.06, "num_word_context": 202.4528, "num_word_doc": 49.8096, "num_word_query": 23.2635, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8581.2902, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6216, "query_norm": 2.5681, "queue_k_norm": 1.7608, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3006, "sent_len_1": 66.7564, "sent_len_max_0": 127.5325, "sent_len_max_1": 190.0825, "stdk": 0.0465, "stdq": 0.0468, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 8100 }, { "accuracy": 38.1836, "active_queue_size": 16384.0, "cl_loss": 4.5436, "doc_norm": 1.7535, "encoder_q-embeddings": 2487.1294, "encoder_q-layer.0": 1667.4379, "encoder_q-layer.1": 1672.9951, "encoder_q-layer.10": 3438.2979, "encoder_q-layer.11": 7610.3125, "encoder_q-layer.2": 1813.5619, "encoder_q-layer.3": 1813.5612, "encoder_q-layer.4": 1838.187, "encoder_q-layer.5": 1724.4845, "encoder_q-layer.6": 1877.8514, "encoder_q-layer.7": 2103.2661, "encoder_q-layer.8": 2893.9243, "encoder_q-layer.9": 2621.1221, "epoch": 0.05, "inbatch_neg_score": 0.717, "inbatch_pos_score": 1.2402, "learning_rate": 4.1e-05, "loss": 4.5436, "norm_diff": 0.9323, "norm_loss": 0.0, "num_token_doc": 66.8685, "num_token_overlap": 11.6426, "num_token_query": 31.2729, "num_token_union": 65.1718, "num_word_context": 202.0522, "num_word_doc": 49.9144, "num_word_query": 23.2333, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4438.5089, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6934, "query_norm": 2.6858, "queue_k_norm": 1.7554, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2729, "sent_len_1": 66.8685, "sent_len_max_0": 127.39, "sent_len_max_1": 189.595, "stdk": 0.0467, "stdq": 0.0477, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 8200 }, { "accuracy": 38.7695, "active_queue_size": 16384.0, "cl_loss": 4.5328, "doc_norm": 1.7505, "encoder_q-embeddings": 3497.2444, "encoder_q-layer.0": 2630.0327, "encoder_q-layer.1": 2554.1794, "encoder_q-layer.10": 2979.3076, "encoder_q-layer.11": 6733.5601, "encoder_q-layer.2": 2929.7124, "encoder_q-layer.3": 2815.7778, "encoder_q-layer.4": 2744.3337, "encoder_q-layer.5": 2515.5554, "encoder_q-layer.6": 2480.0989, "encoder_q-layer.7": 2258.7061, "encoder_q-layer.8": 2511.6912, "encoder_q-layer.9": 2243.4922, "epoch": 0.05, "inbatch_neg_score": 0.7742, "inbatch_pos_score": 1.2578, "learning_rate": 4.15e-05, "loss": 4.5328, "norm_diff": 0.9863, "norm_loss": 0.0, "num_token_doc": 66.9018, "num_token_overlap": 11.7167, "num_token_query": 31.4272, "num_token_union": 65.2042, "num_word_context": 202.0932, "num_word_doc": 49.9184, "num_word_query": 23.3277, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4860.2584, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.749, "query_norm": 2.7368, "queue_k_norm": 1.7473, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4272, "sent_len_1": 66.9018, "sent_len_max_0": 127.3462, "sent_len_max_1": 189.6525, "stdk": 0.0467, "stdq": 0.0457, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 8300 }, { "accuracy": 39.4531, "active_queue_size": 16384.0, "cl_loss": 4.5262, "doc_norm": 1.7457, "encoder_q-embeddings": 2229.9231, "encoder_q-layer.0": 1608.4469, "encoder_q-layer.1": 1643.6974, "encoder_q-layer.10": 3550.2095, "encoder_q-layer.11": 8303.1924, "encoder_q-layer.2": 1788.0767, "encoder_q-layer.3": 1764.0435, "encoder_q-layer.4": 1840.5707, "encoder_q-layer.5": 1652.843, "encoder_q-layer.6": 1791.2979, "encoder_q-layer.7": 1901.3605, "encoder_q-layer.8": 2346.1619, "encoder_q-layer.9": 2479.9067, "epoch": 0.05, "inbatch_neg_score": 0.8038, "inbatch_pos_score": 1.3057, "learning_rate": 4.2e-05, "loss": 4.5262, "norm_diff": 0.9252, "norm_loss": 0.0, "num_token_doc": 66.5681, "num_token_overlap": 11.6482, "num_token_query": 31.2805, "num_token_union": 64.9893, "num_word_context": 202.1671, "num_word_doc": 49.691, "num_word_query": 23.2366, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4584.9224, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7832, "query_norm": 2.6709, "queue_k_norm": 1.7455, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2805, "sent_len_1": 66.5681, "sent_len_max_0": 127.4813, "sent_len_max_1": 190.22, "stdk": 0.0468, "stdq": 0.0463, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 8400 }, { "accuracy": 40.332, "active_queue_size": 16384.0, "cl_loss": 4.5423, "doc_norm": 1.7401, "encoder_q-embeddings": 3129.3745, "encoder_q-layer.0": 2528.1296, "encoder_q-layer.1": 2287.0105, "encoder_q-layer.10": 4369.978, "encoder_q-layer.11": 9558.5205, "encoder_q-layer.2": 2349.8083, "encoder_q-layer.3": 2274.7056, "encoder_q-layer.4": 2065.0811, "encoder_q-layer.5": 1854.2858, "encoder_q-layer.6": 1943.1779, "encoder_q-layer.7": 2028.038, "encoder_q-layer.8": 2302.7688, "encoder_q-layer.9": 2305.3203, "epoch": 0.06, "inbatch_neg_score": 0.7763, "inbatch_pos_score": 1.2832, "learning_rate": 4.25e-05, "loss": 4.5423, "norm_diff": 0.849, "norm_loss": 0.0, "num_token_doc": 66.8017, "num_token_overlap": 11.6952, "num_token_query": 31.3233, "num_token_union": 65.1121, "num_word_context": 202.2346, "num_word_doc": 49.8827, "num_word_query": 23.2544, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5416.4523, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7549, "query_norm": 2.5892, "queue_k_norm": 1.7416, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3233, "sent_len_1": 66.8017, "sent_len_max_0": 127.365, "sent_len_max_1": 189.6838, "stdk": 0.0465, "stdq": 0.0467, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 8500 }, { "accuracy": 39.8438, "active_queue_size": 16384.0, "cl_loss": 4.5499, "doc_norm": 1.737, "encoder_q-embeddings": 1765.1085, "encoder_q-layer.0": 1248.7219, "encoder_q-layer.1": 1249.2216, "encoder_q-layer.10": 2558.8049, "encoder_q-layer.11": 5983.9775, "encoder_q-layer.2": 1413.6884, "encoder_q-layer.3": 1488.5841, "encoder_q-layer.4": 1518.657, "encoder_q-layer.5": 1504.3518, "encoder_q-layer.6": 1657.342, "encoder_q-layer.7": 1841.7195, "encoder_q-layer.8": 2182.46, "encoder_q-layer.9": 1936.7987, "epoch": 0.06, "inbatch_neg_score": 0.7528, "inbatch_pos_score": 1.2402, "learning_rate": 4.3e-05, "loss": 4.5499, "norm_diff": 0.6761, "norm_loss": 0.0, "num_token_doc": 66.8618, "num_token_overlap": 11.7339, "num_token_query": 31.4526, "num_token_union": 65.1296, "num_word_context": 201.9455, "num_word_doc": 49.8262, "num_word_query": 23.336, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3644.4483, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7329, "query_norm": 2.4132, "queue_k_norm": 1.7378, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4526, "sent_len_1": 66.8618, "sent_len_max_0": 127.3225, "sent_len_max_1": 191.2287, "stdk": 0.0465, "stdq": 0.0456, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 8600 }, { "accuracy": 36.7188, "active_queue_size": 16384.0, "cl_loss": 4.5719, "doc_norm": 1.7347, "encoder_q-embeddings": 4327.4741, "encoder_q-layer.0": 3126.1296, "encoder_q-layer.1": 3206.6987, "encoder_q-layer.10": 3729.4004, "encoder_q-layer.11": 7813.3647, "encoder_q-layer.2": 3340.1875, "encoder_q-layer.3": 3256.7974, "encoder_q-layer.4": 3523.2249, "encoder_q-layer.5": 3217.6521, "encoder_q-layer.6": 2968.249, "encoder_q-layer.7": 2296.2573, "encoder_q-layer.8": 2210.7559, "encoder_q-layer.9": 2233.8506, "epoch": 0.06, "inbatch_neg_score": 0.6356, "inbatch_pos_score": 1.1191, "learning_rate": 4.35e-05, "loss": 4.5719, "norm_diff": 0.5857, "norm_loss": 0.0, "num_token_doc": 66.726, "num_token_overlap": 11.6118, "num_token_query": 31.1849, "num_token_union": 65.0174, "num_word_context": 202.2095, "num_word_doc": 49.7688, "num_word_query": 23.1579, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5640.6633, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6182, "query_norm": 2.3204, "queue_k_norm": 1.7327, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.1849, "sent_len_1": 66.726, "sent_len_max_0": 127.4175, "sent_len_max_1": 190.0325, "stdk": 0.0465, "stdq": 0.0454, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 8700 }, { "accuracy": 40.0391, "active_queue_size": 16384.0, "cl_loss": 4.529, "doc_norm": 1.7293, "encoder_q-embeddings": 1964.6766, "encoder_q-layer.0": 1361.6383, "encoder_q-layer.1": 1379.3491, "encoder_q-layer.10": 2946.71, "encoder_q-layer.11": 6764.7568, "encoder_q-layer.2": 1617.8524, "encoder_q-layer.3": 1724.8793, "encoder_q-layer.4": 1830.4296, "encoder_q-layer.5": 1833.0968, "encoder_q-layer.6": 1883.3798, "encoder_q-layer.7": 2372.1323, "encoder_q-layer.8": 2720.1177, "encoder_q-layer.9": 2452.0037, "epoch": 0.06, "inbatch_neg_score": 0.5556, "inbatch_pos_score": 1.0742, "learning_rate": 4.4000000000000006e-05, "loss": 4.529, "norm_diff": 0.6053, "norm_loss": 0.0, "num_token_doc": 66.9261, "num_token_overlap": 11.705, "num_token_query": 31.5898, "num_token_union": 65.3397, "num_word_context": 202.768, "num_word_doc": 49.9752, "num_word_query": 23.4734, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4105.533, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5391, "query_norm": 2.3346, "queue_k_norm": 1.7256, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5898, "sent_len_1": 66.9261, "sent_len_max_0": 127.5487, "sent_len_max_1": 189.5188, "stdk": 0.0467, "stdq": 0.0474, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 8800 }, { "accuracy": 39.8438, "active_queue_size": 16384.0, "cl_loss": 4.5312, "doc_norm": 1.7158, "encoder_q-embeddings": 2150.1873, "encoder_q-layer.0": 1469.2079, "encoder_q-layer.1": 1410.7559, "encoder_q-layer.10": 2897.1572, "encoder_q-layer.11": 5992.7188, "encoder_q-layer.2": 1591.0237, "encoder_q-layer.3": 1712.1682, "encoder_q-layer.4": 1675.6362, "encoder_q-layer.5": 1666.6902, "encoder_q-layer.6": 1824.5792, "encoder_q-layer.7": 2038.6881, "encoder_q-layer.8": 2493.0347, "encoder_q-layer.9": 2385.8516, "epoch": 0.06, "inbatch_neg_score": 0.4719, "inbatch_pos_score": 0.9678, "learning_rate": 4.4500000000000004e-05, "loss": 4.5312, "norm_diff": 0.6367, "norm_loss": 0.0, "num_token_doc": 66.7033, "num_token_overlap": 11.6218, "num_token_query": 31.4283, "num_token_union": 65.1674, "num_word_context": 202.3987, "num_word_doc": 49.8429, "num_word_query": 23.3556, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3676.123, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4575, "query_norm": 2.3526, "queue_k_norm": 1.7173, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4283, "sent_len_1": 66.7033, "sent_len_max_0": 127.6175, "sent_len_max_1": 189.28, "stdk": 0.0466, "stdq": 0.0454, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 8900 }, { "accuracy": 39.6484, "active_queue_size": 16384.0, "cl_loss": 4.4762, "doc_norm": 1.7072, "encoder_q-embeddings": 2399.6873, "encoder_q-layer.0": 1752.1361, "encoder_q-layer.1": 1685.032, "encoder_q-layer.10": 2663.1599, "encoder_q-layer.11": 5629.8955, "encoder_q-layer.2": 1782.0425, "encoder_q-layer.3": 1877.9161, "encoder_q-layer.4": 1919.2383, "encoder_q-layer.5": 1920.9037, "encoder_q-layer.6": 2087.0127, "encoder_q-layer.7": 2411.7263, "encoder_q-layer.8": 2759.8525, "encoder_q-layer.9": 2280.3682, "epoch": 0.06, "inbatch_neg_score": 0.4114, "inbatch_pos_score": 0.9229, "learning_rate": 4.5e-05, "loss": 4.4762, "norm_diff": 0.7373, "norm_loss": 0.0, "num_token_doc": 66.6512, "num_token_overlap": 11.7285, "num_token_query": 31.4868, "num_token_union": 65.0896, "num_word_context": 201.8614, "num_word_doc": 49.7115, "num_word_query": 23.3792, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3863.4287, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3967, "query_norm": 2.4445, "queue_k_norm": 1.7062, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4868, "sent_len_1": 66.6512, "sent_len_max_0": 127.425, "sent_len_max_1": 189.5437, "stdk": 0.0466, "stdq": 0.0468, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 9000 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 4.4514, "doc_norm": 1.6975, "encoder_q-embeddings": 1953.7513, "encoder_q-layer.0": 1401.5743, "encoder_q-layer.1": 1430.6761, "encoder_q-layer.10": 3094.7461, "encoder_q-layer.11": 6180.6167, "encoder_q-layer.2": 1667.8892, "encoder_q-layer.3": 1680.2025, "encoder_q-layer.4": 1713.1083, "encoder_q-layer.5": 1701.0765, "encoder_q-layer.6": 1721.7271, "encoder_q-layer.7": 1749.2676, "encoder_q-layer.8": 1992.193, "encoder_q-layer.9": 1898.3019, "epoch": 0.06, "inbatch_neg_score": 0.4193, "inbatch_pos_score": 0.939, "learning_rate": 4.55e-05, "loss": 4.4514, "norm_diff": 0.7387, "norm_loss": 0.0, "num_token_doc": 66.6463, "num_token_overlap": 11.63, "num_token_query": 31.3052, "num_token_union": 65.0484, "num_word_context": 202.5849, "num_word_doc": 49.6888, "num_word_query": 23.2288, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3654.2846, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4019, "query_norm": 2.4362, "queue_k_norm": 1.6926, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3052, "sent_len_1": 66.6463, "sent_len_max_0": 127.285, "sent_len_max_1": 191.53, "stdk": 0.0466, "stdq": 0.0453, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 9100 }, { "accuracy": 40.918, "active_queue_size": 16384.0, "cl_loss": 4.4341, "doc_norm": 1.6817, "encoder_q-embeddings": 9527.6172, "encoder_q-layer.0": 8748.6367, "encoder_q-layer.1": 7351.1362, "encoder_q-layer.10": 2830.0081, "encoder_q-layer.11": 6494.1064, "encoder_q-layer.2": 8270.041, "encoder_q-layer.3": 7387.7344, "encoder_q-layer.4": 6908.1162, "encoder_q-layer.5": 6282.0659, "encoder_q-layer.6": 5002.2285, "encoder_q-layer.7": 2805.7498, "encoder_q-layer.8": 1835.9714, "encoder_q-layer.9": 1914.1659, "epoch": 0.06, "inbatch_neg_score": 0.5049, "inbatch_pos_score": 1.0176, "learning_rate": 4.600000000000001e-05, "loss": 4.4341, "norm_diff": 0.8237, "norm_loss": 0.0, "num_token_doc": 66.4867, "num_token_overlap": 11.6796, "num_token_query": 31.3596, "num_token_union": 64.951, "num_word_context": 202.153, "num_word_doc": 49.6619, "num_word_query": 23.2776, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9741.2244, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4846, "query_norm": 2.5054, "queue_k_norm": 1.6837, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3596, "sent_len_1": 66.4867, "sent_len_max_0": 127.365, "sent_len_max_1": 187.5, "stdk": 0.0464, "stdq": 0.046, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 9200 }, { "accuracy": 39.4531, "active_queue_size": 16384.0, "cl_loss": 4.4441, "doc_norm": 1.6773, "encoder_q-embeddings": 3626.7629, "encoder_q-layer.0": 2808.6179, "encoder_q-layer.1": 2998.1814, "encoder_q-layer.10": 2385.3855, "encoder_q-layer.11": 5707.9688, "encoder_q-layer.2": 3677.9636, "encoder_q-layer.3": 2982.5591, "encoder_q-layer.4": 2691.4243, "encoder_q-layer.5": 2557.2363, "encoder_q-layer.6": 2115.9307, "encoder_q-layer.7": 1888.632, "encoder_q-layer.8": 2023.2754, "encoder_q-layer.9": 1937.5959, "epoch": 0.06, "inbatch_neg_score": 0.5257, "inbatch_pos_score": 1.0078, "learning_rate": 4.6500000000000005e-05, "loss": 4.4441, "norm_diff": 0.754, "norm_loss": 0.0, "num_token_doc": 66.637, "num_token_overlap": 11.6342, "num_token_query": 31.2428, "num_token_union": 65.0076, "num_word_context": 201.9154, "num_word_doc": 49.7887, "num_word_query": 23.1944, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4631.0883, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5059, "query_norm": 2.4313, "queue_k_norm": 1.6734, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2428, "sent_len_1": 66.637, "sent_len_max_0": 127.5088, "sent_len_max_1": 189.7038, "stdk": 0.0465, "stdq": 0.0445, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 9300 }, { "accuracy": 41.0156, "active_queue_size": 16384.0, "cl_loss": 4.4313, "doc_norm": 1.667, "encoder_q-embeddings": 3350.7356, "encoder_q-layer.0": 2475.6287, "encoder_q-layer.1": 2710.0134, "encoder_q-layer.10": 3119.4478, "encoder_q-layer.11": 7635.5405, "encoder_q-layer.2": 3083.2253, "encoder_q-layer.3": 3006.623, "encoder_q-layer.4": 2906.1128, "encoder_q-layer.5": 2764.6899, "encoder_q-layer.6": 2457.8496, "encoder_q-layer.7": 1949.0359, "encoder_q-layer.8": 1969.7102, "encoder_q-layer.9": 2056.0354, "epoch": 0.06, "inbatch_neg_score": 0.5557, "inbatch_pos_score": 1.0566, "learning_rate": 4.7e-05, "loss": 4.4313, "norm_diff": 0.7038, "norm_loss": 0.0, "num_token_doc": 66.7932, "num_token_overlap": 11.6904, "num_token_query": 31.3096, "num_token_union": 65.0972, "num_word_context": 201.8781, "num_word_doc": 49.8336, "num_word_query": 23.2706, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5033.4321, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5366, "query_norm": 2.3708, "queue_k_norm": 1.664, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3096, "sent_len_1": 66.7932, "sent_len_max_0": 127.4375, "sent_len_max_1": 190.2988, "stdk": 0.0464, "stdq": 0.0454, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 9400 }, { "accuracy": 40.7227, "active_queue_size": 16384.0, "cl_loss": 4.4231, "doc_norm": 1.6519, "encoder_q-embeddings": 1670.226, "encoder_q-layer.0": 1112.7418, "encoder_q-layer.1": 1136.8669, "encoder_q-layer.10": 2490.1643, "encoder_q-layer.11": 5685.3496, "encoder_q-layer.2": 1289.4893, "encoder_q-layer.3": 1362.4189, "encoder_q-layer.4": 1448.5972, "encoder_q-layer.5": 1474.8135, "encoder_q-layer.6": 1770.4636, "encoder_q-layer.7": 1892.5605, "encoder_q-layer.8": 2207.2231, "encoder_q-layer.9": 2122.9294, "epoch": 0.06, "inbatch_neg_score": 0.6052, "inbatch_pos_score": 1.1152, "learning_rate": 4.75e-05, "loss": 4.4231, "norm_diff": 0.6744, "norm_loss": 0.0, "num_token_doc": 66.6987, "num_token_overlap": 11.649, "num_token_query": 31.2177, "num_token_union": 64.9311, "num_word_context": 201.9162, "num_word_doc": 49.7438, "num_word_query": 23.158, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3372.0276, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5894, "query_norm": 2.3263, "queue_k_norm": 1.6565, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2177, "sent_len_1": 66.6987, "sent_len_max_0": 127.4813, "sent_len_max_1": 191.765, "stdk": 0.0462, "stdq": 0.045, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 9500 }, { "accuracy": 40.625, "active_queue_size": 16384.0, "cl_loss": 4.4269, "doc_norm": 1.6552, "encoder_q-embeddings": 4929.9893, "encoder_q-layer.0": 3677.0781, "encoder_q-layer.1": 3677.521, "encoder_q-layer.10": 2650.8418, "encoder_q-layer.11": 6345.666, "encoder_q-layer.2": 4152.7539, "encoder_q-layer.3": 3739.0078, "encoder_q-layer.4": 3163.5723, "encoder_q-layer.5": 2905.5022, "encoder_q-layer.6": 2650.2498, "encoder_q-layer.7": 2110.9653, "encoder_q-layer.8": 2232.1294, "encoder_q-layer.9": 2049.7981, "epoch": 0.06, "inbatch_neg_score": 0.61, "inbatch_pos_score": 1.0889, "learning_rate": 4.8e-05, "loss": 4.4269, "norm_diff": 0.5887, "norm_loss": 0.0, "num_token_doc": 66.7586, "num_token_overlap": 11.6499, "num_token_query": 31.3082, "num_token_union": 65.0815, "num_word_context": 202.1221, "num_word_doc": 49.7942, "num_word_query": 23.2116, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5421.3707, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5928, "query_norm": 2.2439, "queue_k_norm": 1.6508, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3082, "sent_len_1": 66.7586, "sent_len_max_0": 127.6312, "sent_len_max_1": 190.4412, "stdk": 0.0464, "stdq": 0.0433, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 9600 }, { "accuracy": 38.7695, "active_queue_size": 16384.0, "cl_loss": 4.3959, "doc_norm": 1.6411, "encoder_q-embeddings": 2299.9658, "encoder_q-layer.0": 1684.3655, "encoder_q-layer.1": 1885.9402, "encoder_q-layer.10": 4212.3398, "encoder_q-layer.11": 9934.4004, "encoder_q-layer.2": 2189.5247, "encoder_q-layer.3": 2393.4736, "encoder_q-layer.4": 2447.9189, "encoder_q-layer.5": 2543.6809, "encoder_q-layer.6": 2318.4697, "encoder_q-layer.7": 2009.3871, "encoder_q-layer.8": 2158.8406, "encoder_q-layer.9": 2064.2634, "epoch": 0.06, "inbatch_neg_score": 0.623, "inbatch_pos_score": 1.127, "learning_rate": 4.85e-05, "loss": 4.3959, "norm_diff": 0.6336, "norm_loss": 0.0, "num_token_doc": 66.6465, "num_token_overlap": 11.6382, "num_token_query": 31.3555, "num_token_union": 65.0857, "num_word_context": 202.1732, "num_word_doc": 49.7287, "num_word_query": 23.2771, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5325.3538, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6074, "query_norm": 2.2747, "queue_k_norm": 1.6452, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3555, "sent_len_1": 66.6465, "sent_len_max_0": 127.6238, "sent_len_max_1": 189.69, "stdk": 0.0461, "stdq": 0.044, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 9700 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.39, "doc_norm": 1.6428, "encoder_q-embeddings": 2046.9078, "encoder_q-layer.0": 1452.9198, "encoder_q-layer.1": 1449.9126, "encoder_q-layer.10": 2113.0283, "encoder_q-layer.11": 5623.3477, "encoder_q-layer.2": 1634.4072, "encoder_q-layer.3": 1703.3014, "encoder_q-layer.4": 1683.9949, "encoder_q-layer.5": 1633.8914, "encoder_q-layer.6": 1747.8776, "encoder_q-layer.7": 1756.3488, "encoder_q-layer.8": 1993.3519, "encoder_q-layer.9": 1860.561, "epoch": 0.06, "inbatch_neg_score": 0.6302, "inbatch_pos_score": 1.1465, "learning_rate": 4.9e-05, "loss": 4.39, "norm_diff": 0.6022, "norm_loss": 0.0, "num_token_doc": 66.9605, "num_token_overlap": 11.6776, "num_token_query": 31.3643, "num_token_union": 65.1985, "num_word_context": 202.4703, "num_word_doc": 49.9719, "num_word_query": 23.2988, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3419.5235, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6191, "query_norm": 2.245, "queue_k_norm": 1.6413, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3643, "sent_len_1": 66.9605, "sent_len_max_0": 127.6937, "sent_len_max_1": 189.0712, "stdk": 0.0463, "stdq": 0.0439, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 9800 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 4.3669, "doc_norm": 1.6414, "encoder_q-embeddings": 2313.5981, "encoder_q-layer.0": 1756.4829, "encoder_q-layer.1": 1898.3015, "encoder_q-layer.10": 2136.7522, "encoder_q-layer.11": 5281.1562, "encoder_q-layer.2": 2117.4287, "encoder_q-layer.3": 2156.6318, "encoder_q-layer.4": 1975.6819, "encoder_q-layer.5": 1714.6244, "encoder_q-layer.6": 1736.9437, "encoder_q-layer.7": 1583.7345, "encoder_q-layer.8": 1683.9541, "encoder_q-layer.9": 1559.5676, "epoch": 0.06, "inbatch_neg_score": 0.6162, "inbatch_pos_score": 1.1387, "learning_rate": 4.9500000000000004e-05, "loss": 4.3669, "norm_diff": 0.5968, "norm_loss": 0.0, "num_token_doc": 66.7103, "num_token_overlap": 11.6431, "num_token_query": 31.3148, "num_token_union": 65.1263, "num_word_context": 202.4812, "num_word_doc": 49.7983, "num_word_query": 23.245, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3518.9788, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6035, "query_norm": 2.2382, "queue_k_norm": 1.6378, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3148, "sent_len_1": 66.7103, "sent_len_max_0": 127.4188, "sent_len_max_1": 190.03, "stdk": 0.0464, "stdq": 0.0429, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 9900 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 4.3816, "doc_norm": 1.6306, "encoder_q-embeddings": 24201.043, "encoder_q-layer.0": 17177.9902, "encoder_q-layer.1": 15764.5146, "encoder_q-layer.10": 2233.6436, "encoder_q-layer.11": 5685.5791, "encoder_q-layer.2": 17216.082, "encoder_q-layer.3": 16085.5029, "encoder_q-layer.4": 16611.8516, "encoder_q-layer.5": 12120.1504, "encoder_q-layer.6": 10651.6494, "encoder_q-layer.7": 4817.6118, "encoder_q-layer.8": 2493.469, "encoder_q-layer.9": 2019.4668, "epoch": 0.07, "inbatch_neg_score": 0.6543, "inbatch_pos_score": 1.1895, "learning_rate": 5e-05, "loss": 4.3816, "norm_diff": 0.6358, "norm_loss": 0.0, "num_token_doc": 66.7599, "num_token_overlap": 11.6471, "num_token_query": 31.2679, "num_token_union": 65.0495, "num_word_context": 202.2333, "num_word_doc": 49.8391, "num_word_query": 23.2193, "postclip_grad_norm": 1.0, "preclip_grad_norm": 20774.0617, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.6396, "query_norm": 2.2664, "queue_k_norm": 1.6335, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2679, "sent_len_1": 66.7599, "sent_len_max_0": 127.4125, "sent_len_max_1": 188.0, "stdk": 0.0461, "stdq": 0.0445, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 10000 }, { "dev_runtime": 28.6463, "dev_samples_per_second": 2.234, "dev_steps_per_second": 0.035, "epoch": 0.07, "step": 10000, "test_accuracy": 90.869140625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.5259673595428467, "test_doc_norm": 1.5578079223632812, "test_inbatch_neg_score": 0.9551270604133606, "test_inbatch_pos_score": 1.695020318031311, "test_loss": 0.5259673595428467, "test_loss_align": 4.080766201019287, "test_loss_unif": 3.2076163291931152, "test_loss_unif_q@queue": 3.2076163291931152, "test_norm_diff": 0.6835236549377441, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.6359478831291199, "test_query_norm": 2.2413315773010254, "test_queue_k_norm": 1.6333190202713013, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.03788496553897858, "test_stdq": 0.03877013921737671, "test_stdqueue_k": 0.046322938054800034, "test_stdqueue_q": 0.0 }, { "dev_runtime": 28.6463, "dev_samples_per_second": 2.234, "dev_steps_per_second": 0.035, "epoch": 0.07, "eval_beir-arguana_ndcg@10": 0.16343, "eval_beir-arguana_recall@10": 0.30299, "eval_beir-arguana_recall@100": 0.6522, "eval_beir-arguana_recall@20": 0.42959, "eval_beir-avg_ndcg@10": 0.1471036666666667, "eval_beir-avg_recall@10": 0.18493583333333333, "eval_beir-avg_recall@100": 0.34856750000000003, "eval_beir-avg_recall@20": 0.23090658333333333, "eval_beir-cqadupstack_ndcg@10": 0.050526666666666664, "eval_beir-cqadupstack_recall@10": 0.07955833333333333, "eval_beir-cqadupstack_recall@100": 0.194815, "eval_beir-cqadupstack_recall@20": 0.10647583333333333, "eval_beir-fiqa_ndcg@10": 0.07478, "eval_beir-fiqa_recall@10": 0.10171, "eval_beir-fiqa_recall@100": 0.22871, "eval_beir-fiqa_recall@20": 0.13331, "eval_beir-nfcorpus_ndcg@10": 0.1705, "eval_beir-nfcorpus_recall@10": 0.07308, "eval_beir-nfcorpus_recall@100": 0.18702, "eval_beir-nfcorpus_recall@20": 0.09709, "eval_beir-nq_ndcg@10": 0.06621, "eval_beir-nq_recall@10": 0.11134, "eval_beir-nq_recall@100": 0.29435, "eval_beir-nq_recall@20": 0.15996, "eval_beir-quora_ndcg@10": 0.12075, "eval_beir-quora_recall@10": 0.22013, "eval_beir-quora_recall@100": 0.52192, "eval_beir-quora_recall@20": 0.30013, "eval_beir-scidocs_ndcg@10": 0.06093, "eval_beir-scidocs_recall@10": 0.0677, "eval_beir-scidocs_recall@100": 0.19297, "eval_beir-scidocs_recall@20": 0.0957, "eval_beir-scifact_ndcg@10": 0.38656, "eval_beir-scifact_recall@10": 0.50789, "eval_beir-scifact_recall@100": 0.76411, "eval_beir-scifact_recall@20": 0.58733, "eval_beir-trec-covid_ndcg@10": 0.2902, "eval_beir-trec-covid_recall@10": 0.332, "eval_beir-trec-covid_recall@100": 0.217, "eval_beir-trec-covid_recall@20": 0.298, "eval_beir-webis-touche2020_ndcg@10": 0.08715, "eval_beir-webis-touche2020_recall@10": 0.05296, "eval_beir-webis-touche2020_recall@100": 0.23258, "eval_beir-webis-touche2020_recall@20": 0.10148, "eval_senteval-avg_sts": 0.6885531946835503, "eval_senteval-sickr_spearman": 0.6558896663101762, "eval_senteval-stsb_spearman": 0.7212167230569243, "step": 10000, "test_accuracy": 90.869140625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.5259673595428467, "test_doc_norm": 1.5578079223632812, "test_inbatch_neg_score": 0.9551270604133606, "test_inbatch_pos_score": 1.695020318031311, "test_loss": 0.5259673595428467, "test_loss_align": 4.080766201019287, "test_loss_unif": 3.2076163291931152, "test_loss_unif_q@queue": 3.2076163291931152, "test_norm_diff": 0.6835236549377441, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.6359478831291199, "test_query_norm": 2.2413315773010254, "test_queue_k_norm": 1.6333190202713013, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.03788496553897858, "test_stdq": 0.03877013921737671, "test_stdqueue_k": 0.046322938054800034, "test_stdqueue_q": 0.0 }, { "accuracy": 40.5273, "active_queue_size": 16384.0, "cl_loss": 4.3329, "doc_norm": 1.6329, "encoder_q-embeddings": 4155.4741, "encoder_q-layer.0": 2967.9153, "encoder_q-layer.1": 2858.5991, "encoder_q-layer.10": 2357.6675, "encoder_q-layer.11": 6260.3594, "encoder_q-layer.2": 2803.9529, "encoder_q-layer.3": 2834.6174, "encoder_q-layer.4": 3087.5779, "encoder_q-layer.5": 3540.8872, "encoder_q-layer.6": 4314.6929, "encoder_q-layer.7": 4289.458, "encoder_q-layer.8": 4041.3472, "encoder_q-layer.9": 2418.2268, "epoch": 0.07, "inbatch_neg_score": 0.6858, "inbatch_pos_score": 1.1729, "learning_rate": 4.994444444444445e-05, "loss": 4.3329, "norm_diff": 0.5431, "norm_loss": 0.0, "num_token_doc": 67.0257, "num_token_overlap": 11.7356, "num_token_query": 31.4931, "num_token_union": 65.2782, "num_word_context": 202.3216, "num_word_doc": 50.0273, "num_word_query": 23.4089, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5393.6061, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6704, "query_norm": 2.1759, "queue_k_norm": 1.6291, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4931, "sent_len_1": 67.0257, "sent_len_max_0": 127.3925, "sent_len_max_1": 188.5838, "stdk": 0.0463, "stdq": 0.042, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 10100 }, { "accuracy": 41.1133, "active_queue_size": 16384.0, "cl_loss": 4.3396, "doc_norm": 1.6318, "encoder_q-embeddings": 3749.7371, "encoder_q-layer.0": 2566.5654, "encoder_q-layer.1": 2788.3979, "encoder_q-layer.10": 4432.5933, "encoder_q-layer.11": 13173.4697, "encoder_q-layer.2": 3196.981, "encoder_q-layer.3": 3409.9802, "encoder_q-layer.4": 3676.2883, "encoder_q-layer.5": 3779.7766, "encoder_q-layer.6": 4233.2847, "encoder_q-layer.7": 4112.3735, "encoder_q-layer.8": 4268.1602, "encoder_q-layer.9": 3810.4253, "epoch": 0.07, "inbatch_neg_score": 0.7001, "inbatch_pos_score": 1.2197, "learning_rate": 4.9888888888888894e-05, "loss": 4.3396, "norm_diff": 0.5308, "norm_loss": 0.0, "num_token_doc": 66.7089, "num_token_overlap": 11.7405, "num_token_query": 31.5021, "num_token_union": 65.1031, "num_word_context": 202.1877, "num_word_doc": 49.7937, "num_word_query": 23.4075, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7369.8274, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6841, "query_norm": 2.1626, "queue_k_norm": 1.6284, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5021, "sent_len_1": 66.7089, "sent_len_max_0": 127.4775, "sent_len_max_1": 189.7763, "stdk": 0.0464, "stdq": 0.0441, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 10200 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 4.3382, "doc_norm": 1.6225, "encoder_q-embeddings": 8285.4961, "encoder_q-layer.0": 6631.8794, "encoder_q-layer.1": 6835.873, "encoder_q-layer.10": 3848.9585, "encoder_q-layer.11": 11003.6289, "encoder_q-layer.2": 7977.3887, "encoder_q-layer.3": 8495.6035, "encoder_q-layer.4": 8228.4883, "encoder_q-layer.5": 7386.6567, "encoder_q-layer.6": 7067.1895, "encoder_q-layer.7": 6118.0239, "encoder_q-layer.8": 5047.895, "encoder_q-layer.9": 3433.6602, "epoch": 0.07, "inbatch_neg_score": 0.6688, "inbatch_pos_score": 1.1855, "learning_rate": 4.9833333333333336e-05, "loss": 4.3382, "norm_diff": 0.437, "norm_loss": 0.0, "num_token_doc": 66.6757, "num_token_overlap": 11.6257, "num_token_query": 31.1726, "num_token_union": 64.9509, "num_word_context": 201.8225, "num_word_doc": 49.759, "num_word_query": 23.1388, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10875.774, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6582, "query_norm": 2.0595, "queue_k_norm": 1.6237, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.1726, "sent_len_1": 66.6757, "sent_len_max_0": 127.3487, "sent_len_max_1": 188.1225, "stdk": 0.0461, "stdq": 0.0422, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 10300 }, { "accuracy": 41.3086, "active_queue_size": 16384.0, "cl_loss": 4.4007, "doc_norm": 1.6224, "encoder_q-embeddings": 1416.8071, "encoder_q-layer.0": 1123.7063, "encoder_q-layer.1": 1066.7147, "encoder_q-layer.10": 505.6219, "encoder_q-layer.11": 1521.5532, "encoder_q-layer.2": 1235.036, "encoder_q-layer.3": 1006.4316, "encoder_q-layer.4": 962.9883, "encoder_q-layer.5": 764.6015, "encoder_q-layer.6": 637.9788, "encoder_q-layer.7": 550.8273, "encoder_q-layer.8": 480.1871, "encoder_q-layer.9": 405.3262, "epoch": 0.07, "inbatch_neg_score": 0.654, "inbatch_pos_score": 1.1641, "learning_rate": 4.977777777777778e-05, "loss": 4.4007, "norm_diff": 0.4307, "norm_loss": 0.0, "num_token_doc": 66.831, "num_token_overlap": 11.5826, "num_token_query": 31.2114, "num_token_union": 65.1145, "num_word_context": 202.0896, "num_word_doc": 49.84, "num_word_query": 23.1596, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1503.8686, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6421, "query_norm": 2.0531, "queue_k_norm": 1.6183, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2114, "sent_len_1": 66.831, "sent_len_max_0": 127.4225, "sent_len_max_1": 190.3375, "stdk": 0.0462, "stdq": 0.0442, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 10400 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 4.4098, "doc_norm": 1.613, "encoder_q-embeddings": 1311.8408, "encoder_q-layer.0": 1009.589, "encoder_q-layer.1": 1074.0731, "encoder_q-layer.10": 535.4493, "encoder_q-layer.11": 1410.2966, "encoder_q-layer.2": 1209.6873, "encoder_q-layer.3": 1241.5452, "encoder_q-layer.4": 1252.3785, "encoder_q-layer.5": 1090.4901, "encoder_q-layer.6": 888.1592, "encoder_q-layer.7": 766.0557, "encoder_q-layer.8": 664.2144, "encoder_q-layer.9": 497.2742, "epoch": 0.07, "inbatch_neg_score": 0.614, "inbatch_pos_score": 1.1465, "learning_rate": 4.972222222222223e-05, "loss": 4.4098, "norm_diff": 0.4193, "norm_loss": 0.0, "num_token_doc": 66.8273, "num_token_overlap": 11.6244, "num_token_query": 31.2653, "num_token_union": 65.1146, "num_word_context": 202.3621, "num_word_doc": 49.9032, "num_word_query": 23.2248, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1551.5518, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.604, "query_norm": 2.0323, "queue_k_norm": 1.6173, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2653, "sent_len_1": 66.8273, "sent_len_max_0": 127.1737, "sent_len_max_1": 188.0387, "stdk": 0.0462, "stdq": 0.0444, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 10500 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 4.385, "doc_norm": 1.6091, "encoder_q-embeddings": 1206.8979, "encoder_q-layer.0": 975.2697, "encoder_q-layer.1": 1053.6493, "encoder_q-layer.10": 470.1652, "encoder_q-layer.11": 1431.4014, "encoder_q-layer.2": 1267.0935, "encoder_q-layer.3": 1287.9055, "encoder_q-layer.4": 1370.4987, "encoder_q-layer.5": 1335.4186, "encoder_q-layer.6": 1322.1476, "encoder_q-layer.7": 1197.5344, "encoder_q-layer.8": 929.1882, "encoder_q-layer.9": 530.1111, "epoch": 0.07, "inbatch_neg_score": 0.5919, "inbatch_pos_score": 1.0889, "learning_rate": 4.966666666666667e-05, "loss": 4.385, "norm_diff": 0.3041, "norm_loss": 0.0, "num_token_doc": 66.7853, "num_token_overlap": 11.7142, "num_token_query": 31.511, "num_token_union": 65.1684, "num_word_context": 202.6433, "num_word_doc": 49.8313, "num_word_query": 23.3891, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1680.02, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5825, "query_norm": 1.9133, "queue_k_norm": 1.6123, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.511, "sent_len_1": 66.7853, "sent_len_max_0": 127.5925, "sent_len_max_1": 188.95, "stdk": 0.0461, "stdq": 0.0425, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 10600 }, { "accuracy": 38.5742, "active_queue_size": 16384.0, "cl_loss": 4.3674, "doc_norm": 1.6122, "encoder_q-embeddings": 1630.0718, "encoder_q-layer.0": 1311.2457, "encoder_q-layer.1": 1405.5348, "encoder_q-layer.10": 622.8403, "encoder_q-layer.11": 1968.6199, "encoder_q-layer.2": 1624.0127, "encoder_q-layer.3": 1682.0104, "encoder_q-layer.4": 1820.8953, "encoder_q-layer.5": 1718.1573, "encoder_q-layer.6": 1733.8108, "encoder_q-layer.7": 1514.9221, "encoder_q-layer.8": 893.709, "encoder_q-layer.9": 549.7943, "epoch": 0.07, "inbatch_neg_score": 0.5909, "inbatch_pos_score": 1.0898, "learning_rate": 4.961111111111111e-05, "loss": 4.3674, "norm_diff": 0.2721, "norm_loss": 0.0, "num_token_doc": 66.7309, "num_token_overlap": 11.6841, "num_token_query": 31.3502, "num_token_union": 65.0667, "num_word_context": 202.2479, "num_word_doc": 49.8364, "num_word_query": 23.2693, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2227.9778, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5835, "query_norm": 1.8844, "queue_k_norm": 1.611, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3502, "sent_len_1": 66.7309, "sent_len_max_0": 127.6075, "sent_len_max_1": 187.2675, "stdk": 0.0464, "stdq": 0.0439, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 10700 }, { "accuracy": 41.9922, "active_queue_size": 16384.0, "cl_loss": 4.359, "doc_norm": 1.6173, "encoder_q-embeddings": 998.3468, "encoder_q-layer.0": 757.34, "encoder_q-layer.1": 833.3889, "encoder_q-layer.10": 488.0684, "encoder_q-layer.11": 1395.05, "encoder_q-layer.2": 965.7157, "encoder_q-layer.3": 957.9444, "encoder_q-layer.4": 1015.6362, "encoder_q-layer.5": 1057.5303, "encoder_q-layer.6": 1048.8678, "encoder_q-layer.7": 916.3969, "encoder_q-layer.8": 675.7826, "encoder_q-layer.9": 560.3661, "epoch": 0.07, "inbatch_neg_score": 0.5365, "inbatch_pos_score": 1.0322, "learning_rate": 4.955555555555556e-05, "loss": 4.359, "norm_diff": 0.2405, "norm_loss": 0.0, "num_token_doc": 66.6838, "num_token_overlap": 11.6436, "num_token_query": 31.3103, "num_token_union": 65.0997, "num_word_context": 202.3116, "num_word_doc": 49.8039, "num_word_query": 23.2758, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1380.7231, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5283, "query_norm": 1.8578, "queue_k_norm": 1.6115, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3103, "sent_len_1": 66.6838, "sent_len_max_0": 127.5413, "sent_len_max_1": 187.9087, "stdk": 0.0467, "stdq": 0.0427, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 10800 }, { "accuracy": 40.0391, "active_queue_size": 16384.0, "cl_loss": 4.3554, "doc_norm": 1.6118, "encoder_q-embeddings": 4703.3555, "encoder_q-layer.0": 3741.9922, "encoder_q-layer.1": 4358.1919, "encoder_q-layer.10": 563.8103, "encoder_q-layer.11": 1399.4318, "encoder_q-layer.2": 4563.5752, "encoder_q-layer.3": 4631.1035, "encoder_q-layer.4": 4894.7676, "encoder_q-layer.5": 5264.7944, "encoder_q-layer.6": 5384.8325, "encoder_q-layer.7": 5117.9448, "encoder_q-layer.8": 3499.8501, "encoder_q-layer.9": 1264.8511, "epoch": 0.07, "inbatch_neg_score": 0.5227, "inbatch_pos_score": 1.002, "learning_rate": 4.9500000000000004e-05, "loss": 4.3554, "norm_diff": 0.2431, "norm_loss": 0.0, "num_token_doc": 66.6739, "num_token_overlap": 11.7152, "num_token_query": 31.32, "num_token_union": 64.9635, "num_word_context": 202.0258, "num_word_doc": 49.7479, "num_word_query": 23.2512, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6127.4026, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5156, "query_norm": 1.8548, "queue_k_norm": 1.6105, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.32, "sent_len_1": 66.6739, "sent_len_max_0": 127.4387, "sent_len_max_1": 188.64, "stdk": 0.0466, "stdq": 0.0415, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 10900 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 4.3969, "doc_norm": 1.5995, "encoder_q-embeddings": 3763.5801, "encoder_q-layer.0": 3116.8337, "encoder_q-layer.1": 3496.0723, "encoder_q-layer.10": 435.125, "encoder_q-layer.11": 1074.5367, "encoder_q-layer.2": 3541.8069, "encoder_q-layer.3": 3531.7847, "encoder_q-layer.4": 3603.2288, "encoder_q-layer.5": 2937.2029, "encoder_q-layer.6": 2610.1223, "encoder_q-layer.7": 2051.1592, "encoder_q-layer.8": 1437.2377, "encoder_q-layer.9": 646.1042, "epoch": 0.07, "inbatch_neg_score": 0.5679, "inbatch_pos_score": 1.0781, "learning_rate": 4.9444444444444446e-05, "loss": 4.3969, "norm_diff": 0.2533, "norm_loss": 0.0, "num_token_doc": 66.822, "num_token_overlap": 11.7035, "num_token_query": 31.4656, "num_token_union": 65.1719, "num_word_context": 202.267, "num_word_doc": 49.8236, "num_word_query": 23.3732, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4182.9919, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5581, "query_norm": 1.8528, "queue_k_norm": 1.6062, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4656, "sent_len_1": 66.822, "sent_len_max_0": 127.5863, "sent_len_max_1": 190.01, "stdk": 0.0462, "stdq": 0.0422, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 11000 }, { "accuracy": 41.0156, "active_queue_size": 16384.0, "cl_loss": 4.3824, "doc_norm": 1.6067, "encoder_q-embeddings": 1066.4858, "encoder_q-layer.0": 814.0846, "encoder_q-layer.1": 913.1208, "encoder_q-layer.10": 514.6866, "encoder_q-layer.11": 1388.1841, "encoder_q-layer.2": 1048.1659, "encoder_q-layer.3": 1072.2872, "encoder_q-layer.4": 1250.92, "encoder_q-layer.5": 1107.4342, "encoder_q-layer.6": 1116.4084, "encoder_q-layer.7": 1044.825, "encoder_q-layer.8": 792.5041, "encoder_q-layer.9": 580.018, "epoch": 0.07, "inbatch_neg_score": 0.4221, "inbatch_pos_score": 0.9448, "learning_rate": 4.938888888888889e-05, "loss": 4.3824, "norm_diff": 0.1227, "norm_loss": 0.0, "num_token_doc": 66.6633, "num_token_overlap": 11.6486, "num_token_query": 31.2847, "num_token_union": 64.9896, "num_word_context": 202.0134, "num_word_doc": 49.7035, "num_word_query": 23.2292, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1453.619, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.417, "query_norm": 1.7294, "queue_k_norm": 1.6076, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2847, "sent_len_1": 66.6633, "sent_len_max_0": 127.4975, "sent_len_max_1": 191.31, "stdk": 0.0467, "stdq": 0.0405, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 11100 }, { "accuracy": 38.6719, "active_queue_size": 16384.0, "cl_loss": 4.3305, "doc_norm": 1.604, "encoder_q-embeddings": 2528.8828, "encoder_q-layer.0": 1943.3435, "encoder_q-layer.1": 2186.9045, "encoder_q-layer.10": 542.9912, "encoder_q-layer.11": 1323.0875, "encoder_q-layer.2": 2625.1108, "encoder_q-layer.3": 2885.2832, "encoder_q-layer.4": 3197.7527, "encoder_q-layer.5": 2985.9746, "encoder_q-layer.6": 2635.5229, "encoder_q-layer.7": 2117.9583, "encoder_q-layer.8": 1580.1085, "encoder_q-layer.9": 687.138, "epoch": 0.07, "inbatch_neg_score": 0.4976, "inbatch_pos_score": 1.0029, "learning_rate": 4.933333333333334e-05, "loss": 4.3305, "norm_diff": 0.18, "norm_loss": 0.0, "num_token_doc": 66.9855, "num_token_overlap": 11.6655, "num_token_query": 31.3979, "num_token_union": 65.2826, "num_word_context": 202.3615, "num_word_doc": 50.0, "num_word_query": 23.3334, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3316.7243, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4915, "query_norm": 1.784, "queue_k_norm": 1.6096, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3979, "sent_len_1": 66.9855, "sent_len_max_0": 127.555, "sent_len_max_1": 188.6087, "stdk": 0.0465, "stdq": 0.0411, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 11200 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.3171, "doc_norm": 1.6095, "encoder_q-embeddings": 3978.2087, "encoder_q-layer.0": 3070.5625, "encoder_q-layer.1": 3007.1511, "encoder_q-layer.10": 481.6933, "encoder_q-layer.11": 1489.7177, "encoder_q-layer.2": 4087.0061, "encoder_q-layer.3": 4541.2271, "encoder_q-layer.4": 4397.0874, "encoder_q-layer.5": 3163.0723, "encoder_q-layer.6": 2825.7639, "encoder_q-layer.7": 2302.8801, "encoder_q-layer.8": 1313.4877, "encoder_q-layer.9": 720.269, "epoch": 0.07, "inbatch_neg_score": 0.4851, "inbatch_pos_score": 0.9897, "learning_rate": 4.927777777777778e-05, "loss": 4.3171, "norm_diff": 0.1988, "norm_loss": 0.0, "num_token_doc": 66.84, "num_token_overlap": 11.7085, "num_token_query": 31.3498, "num_token_union": 65.1059, "num_word_context": 202.3322, "num_word_doc": 49.9018, "num_word_query": 23.3047, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4668.9539, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4778, "query_norm": 1.8082, "queue_k_norm": 1.6077, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3498, "sent_len_1": 66.84, "sent_len_max_0": 127.3388, "sent_len_max_1": 190.6213, "stdk": 0.0468, "stdq": 0.0416, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 11300 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.3733, "doc_norm": 1.6047, "encoder_q-embeddings": 2871.1191, "encoder_q-layer.0": 2084.261, "encoder_q-layer.1": 2306.9131, "encoder_q-layer.10": 465.8745, "encoder_q-layer.11": 1307.277, "encoder_q-layer.2": 2533.4749, "encoder_q-layer.3": 2492.7217, "encoder_q-layer.4": 2584.8008, "encoder_q-layer.5": 2383.3494, "encoder_q-layer.6": 2734.0044, "encoder_q-layer.7": 2908.4771, "encoder_q-layer.8": 1713.0995, "encoder_q-layer.9": 681.2682, "epoch": 0.07, "inbatch_neg_score": 0.5332, "inbatch_pos_score": 1.0439, "learning_rate": 4.922222222222222e-05, "loss": 4.3733, "norm_diff": 0.2554, "norm_loss": 0.0, "num_token_doc": 66.6836, "num_token_overlap": 11.6035, "num_token_query": 31.2541, "num_token_union": 65.0635, "num_word_context": 202.1398, "num_word_doc": 49.7645, "num_word_query": 23.2132, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3361.4387, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5249, "query_norm": 1.8601, "queue_k_norm": 1.609, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2541, "sent_len_1": 66.6836, "sent_len_max_0": 127.5175, "sent_len_max_1": 188.2713, "stdk": 0.0466, "stdq": 0.0421, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 11400 }, { "accuracy": 41.8945, "active_queue_size": 16384.0, "cl_loss": 4.3308, "doc_norm": 1.6125, "encoder_q-embeddings": 905.2542, "encoder_q-layer.0": 676.5231, "encoder_q-layer.1": 833.3872, "encoder_q-layer.10": 554.3226, "encoder_q-layer.11": 1529.2565, "encoder_q-layer.2": 1014.9458, "encoder_q-layer.3": 1136.5532, "encoder_q-layer.4": 1245.4542, "encoder_q-layer.5": 1049.1584, "encoder_q-layer.6": 1099.6562, "encoder_q-layer.7": 1170.566, "encoder_q-layer.8": 889.9708, "encoder_q-layer.9": 489.5147, "epoch": 0.07, "inbatch_neg_score": 0.4882, "inbatch_pos_score": 0.9932, "learning_rate": 4.9166666666666665e-05, "loss": 4.3308, "norm_diff": 0.2098, "norm_loss": 0.0, "num_token_doc": 66.8512, "num_token_overlap": 11.6793, "num_token_query": 31.4441, "num_token_union": 65.1647, "num_word_context": 202.1781, "num_word_doc": 49.8183, "num_word_query": 23.3479, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1464.0086, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4824, "query_norm": 1.8222, "queue_k_norm": 1.6095, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4441, "sent_len_1": 66.8512, "sent_len_max_0": 127.4475, "sent_len_max_1": 191.2275, "stdk": 0.047, "stdq": 0.0401, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 11500 }, { "accuracy": 38.8672, "active_queue_size": 16384.0, "cl_loss": 4.3394, "doc_norm": 1.6082, "encoder_q-embeddings": 1137.0769, "encoder_q-layer.0": 1000.5662, "encoder_q-layer.1": 989.1314, "encoder_q-layer.10": 243.9117, "encoder_q-layer.11": 531.6695, "encoder_q-layer.2": 1031.9939, "encoder_q-layer.3": 989.5181, "encoder_q-layer.4": 1000.0619, "encoder_q-layer.5": 783.0114, "encoder_q-layer.6": 795.8432, "encoder_q-layer.7": 730.6517, "encoder_q-layer.8": 470.2646, "encoder_q-layer.9": 227.7106, "epoch": 0.08, "inbatch_neg_score": 0.5091, "inbatch_pos_score": 1.0088, "learning_rate": 4.9111111111111114e-05, "loss": 4.3394, "norm_diff": 0.2584, "norm_loss": 0.0, "num_token_doc": 66.7547, "num_token_overlap": 11.6396, "num_token_query": 31.3314, "num_token_union": 65.0864, "num_word_context": 202.5449, "num_word_doc": 49.8307, "num_word_query": 23.257, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1252.6075, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5024, "query_norm": 1.8666, "queue_k_norm": 1.6072, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3314, "sent_len_1": 66.7547, "sent_len_max_0": 127.505, "sent_len_max_1": 190.3663, "stdk": 0.0467, "stdq": 0.0412, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 11600 }, { "accuracy": 39.1602, "active_queue_size": 16384.0, "cl_loss": 4.3436, "doc_norm": 1.6121, "encoder_q-embeddings": 1183.606, "encoder_q-layer.0": 971.3836, "encoder_q-layer.1": 1159.1564, "encoder_q-layer.10": 267.1852, "encoder_q-layer.11": 653.8042, "encoder_q-layer.2": 1272.238, "encoder_q-layer.3": 1393.0031, "encoder_q-layer.4": 1521.8369, "encoder_q-layer.5": 1524.2495, "encoder_q-layer.6": 1500.7439, "encoder_q-layer.7": 1218.7205, "encoder_q-layer.8": 711.1906, "encoder_q-layer.9": 283.8545, "epoch": 0.08, "inbatch_neg_score": 0.4833, "inbatch_pos_score": 0.9756, "learning_rate": 4.905555555555556e-05, "loss": 4.3436, "norm_diff": 0.29, "norm_loss": 0.0, "num_token_doc": 66.8023, "num_token_overlap": 11.6182, "num_token_query": 31.2574, "num_token_union": 65.1235, "num_word_context": 202.5592, "num_word_doc": 49.8525, "num_word_query": 23.2006, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1688.4241, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4761, "query_norm": 1.9021, "queue_k_norm": 1.6091, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2574, "sent_len_1": 66.8023, "sent_len_max_0": 127.6338, "sent_len_max_1": 187.2663, "stdk": 0.0468, "stdq": 0.0419, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 11700 }, { "accuracy": 38.8672, "active_queue_size": 16384.0, "cl_loss": 4.3452, "doc_norm": 1.6179, "encoder_q-embeddings": 696.4046, "encoder_q-layer.0": 504.3609, "encoder_q-layer.1": 648.1385, "encoder_q-layer.10": 252.5876, "encoder_q-layer.11": 551.7421, "encoder_q-layer.2": 809.6393, "encoder_q-layer.3": 876.6669, "encoder_q-layer.4": 791.5293, "encoder_q-layer.5": 539.8204, "encoder_q-layer.6": 614.4625, "encoder_q-layer.7": 501.0052, "encoder_q-layer.8": 264.6649, "encoder_q-layer.9": 208.3771, "epoch": 0.08, "inbatch_neg_score": 0.4336, "inbatch_pos_score": 0.918, "learning_rate": 4.9e-05, "loss": 4.3452, "norm_diff": 0.229, "norm_loss": 0.0, "num_token_doc": 66.4883, "num_token_overlap": 11.6326, "num_token_query": 31.2587, "num_token_union": 64.9194, "num_word_context": 202.1398, "num_word_doc": 49.6348, "num_word_query": 23.2029, "postclip_grad_norm": 1.0, "preclip_grad_norm": 908.4079, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4275, "query_norm": 1.8469, "queue_k_norm": 1.6128, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2587, "sent_len_1": 66.4883, "sent_len_max_0": 127.3363, "sent_len_max_1": 187.2512, "stdk": 0.047, "stdq": 0.0401, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 11800 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 4.3275, "doc_norm": 1.6169, "encoder_q-embeddings": 1310.3387, "encoder_q-layer.0": 928.4818, "encoder_q-layer.1": 1023.9304, "encoder_q-layer.10": 230.1987, "encoder_q-layer.11": 604.0643, "encoder_q-layer.2": 1174.4473, "encoder_q-layer.3": 1365.0867, "encoder_q-layer.4": 1416.3025, "encoder_q-layer.5": 1409.3477, "encoder_q-layer.6": 1487.1256, "encoder_q-layer.7": 1605.2175, "encoder_q-layer.8": 1236.4615, "encoder_q-layer.9": 467.735, "epoch": 0.08, "inbatch_neg_score": 0.5015, "inbatch_pos_score": 1.0205, "learning_rate": 4.894444444444445e-05, "loss": 4.3275, "norm_diff": 0.2689, "norm_loss": 0.0, "num_token_doc": 66.8967, "num_token_overlap": 11.6447, "num_token_query": 31.2219, "num_token_union": 65.1611, "num_word_context": 202.3285, "num_word_doc": 49.9327, "num_word_query": 23.1688, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1709.7806, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4949, "query_norm": 1.8859, "queue_k_norm": 1.618, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2219, "sent_len_1": 66.8967, "sent_len_max_0": 127.4963, "sent_len_max_1": 191.2325, "stdk": 0.0469, "stdq": 0.0426, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 11900 }, { "accuracy": 40.625, "active_queue_size": 16384.0, "cl_loss": 4.3216, "doc_norm": 1.6159, "encoder_q-embeddings": 1094.0931, "encoder_q-layer.0": 818.5135, "encoder_q-layer.1": 882.4736, "encoder_q-layer.10": 252.8212, "encoder_q-layer.11": 609.4211, "encoder_q-layer.2": 1040.1644, "encoder_q-layer.3": 1167.6294, "encoder_q-layer.4": 1093.4882, "encoder_q-layer.5": 1083.9978, "encoder_q-layer.6": 902.7798, "encoder_q-layer.7": 705.275, "encoder_q-layer.8": 395.6072, "encoder_q-layer.9": 226.9921, "epoch": 0.08, "inbatch_neg_score": 0.5472, "inbatch_pos_score": 1.0625, "learning_rate": 4.888888888888889e-05, "loss": 4.3216, "norm_diff": 0.3057, "norm_loss": 0.0, "num_token_doc": 66.7669, "num_token_overlap": 11.686, "num_token_query": 31.4104, "num_token_union": 65.1519, "num_word_context": 202.5199, "num_word_doc": 49.8477, "num_word_query": 23.3414, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1280.6868, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.54, "query_norm": 1.9217, "queue_k_norm": 1.6168, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4104, "sent_len_1": 66.7669, "sent_len_max_0": 127.5, "sent_len_max_1": 190.1513, "stdk": 0.0468, "stdq": 0.043, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 12000 }, { "accuracy": 39.4531, "active_queue_size": 16384.0, "cl_loss": 4.31, "doc_norm": 1.62, "encoder_q-embeddings": 12700.2705, "encoder_q-layer.0": 9623.2109, "encoder_q-layer.1": 11147.166, "encoder_q-layer.10": 243.7425, "encoder_q-layer.11": 569.7717, "encoder_q-layer.2": 12335.3779, "encoder_q-layer.3": 12795.417, "encoder_q-layer.4": 11798.5107, "encoder_q-layer.5": 7380.3608, "encoder_q-layer.6": 5857.6577, "encoder_q-layer.7": 3896.929, "encoder_q-layer.8": 1864.6158, "encoder_q-layer.9": 674.1857, "epoch": 0.08, "inbatch_neg_score": 0.4828, "inbatch_pos_score": 0.9756, "learning_rate": 4.883333333333334e-05, "loss": 4.31, "norm_diff": 0.2764, "norm_loss": 0.0, "num_token_doc": 66.7452, "num_token_overlap": 11.7066, "num_token_query": 31.4588, "num_token_union": 65.1486, "num_word_context": 202.2339, "num_word_doc": 49.8462, "num_word_query": 23.3413, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13492.0195, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.479, "query_norm": 1.8964, "queue_k_norm": 1.6188, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4588, "sent_len_1": 66.7452, "sent_len_max_0": 127.4737, "sent_len_max_1": 188.465, "stdk": 0.047, "stdq": 0.0416, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 12100 }, { "accuracy": 41.9922, "active_queue_size": 16384.0, "cl_loss": 4.3204, "doc_norm": 1.6179, "encoder_q-embeddings": 478.3845, "encoder_q-layer.0": 329.6757, "encoder_q-layer.1": 352.5796, "encoder_q-layer.10": 241.3986, "encoder_q-layer.11": 673.1399, "encoder_q-layer.2": 406.2075, "encoder_q-layer.3": 399.6085, "encoder_q-layer.4": 386.4756, "encoder_q-layer.5": 343.787, "encoder_q-layer.6": 279.8784, "encoder_q-layer.7": 274.3539, "encoder_q-layer.8": 289.1513, "encoder_q-layer.9": 241.9699, "epoch": 0.08, "inbatch_neg_score": 0.4894, "inbatch_pos_score": 1.0107, "learning_rate": 4.8777777777777775e-05, "loss": 4.3204, "norm_diff": 0.2901, "norm_loss": 0.0, "num_token_doc": 66.7202, "num_token_overlap": 11.6879, "num_token_query": 31.4745, "num_token_union": 65.1419, "num_word_context": 202.5244, "num_word_doc": 49.7888, "num_word_query": 23.3867, "postclip_grad_norm": 1.0, "preclip_grad_norm": 561.9707, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4844, "query_norm": 1.908, "queue_k_norm": 1.6171, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4745, "sent_len_1": 66.7202, "sent_len_max_0": 127.495, "sent_len_max_1": 190.0387, "stdk": 0.047, "stdq": 0.0427, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 12200 }, { "accuracy": 43.1641, "active_queue_size": 16384.0, "cl_loss": 4.3153, "doc_norm": 1.6215, "encoder_q-embeddings": 218.8607, "encoder_q-layer.0": 147.1708, "encoder_q-layer.1": 153.1303, "encoder_q-layer.10": 226.6635, "encoder_q-layer.11": 572.0851, "encoder_q-layer.2": 170.83, "encoder_q-layer.3": 183.0303, "encoder_q-layer.4": 181.1532, "encoder_q-layer.5": 163.9246, "encoder_q-layer.6": 182.5994, "encoder_q-layer.7": 194.49, "encoder_q-layer.8": 205.4841, "encoder_q-layer.9": 174.5199, "epoch": 0.08, "inbatch_neg_score": 0.3677, "inbatch_pos_score": 0.8799, "learning_rate": 4.8722222222222224e-05, "loss": 4.3153, "norm_diff": 0.3229, "norm_loss": 0.0, "num_token_doc": 66.7254, "num_token_overlap": 11.6705, "num_token_query": 31.3002, "num_token_union": 65.0615, "num_word_context": 202.0884, "num_word_doc": 49.7993, "num_word_query": 23.2395, "postclip_grad_norm": 1.0, "preclip_grad_norm": 337.8597, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3589, "query_norm": 1.9443, "queue_k_norm": 1.6226, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3002, "sent_len_1": 66.7254, "sent_len_max_0": 127.4788, "sent_len_max_1": 187.4375, "stdk": 0.0469, "stdq": 0.0411, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 12300 }, { "accuracy": 39.6484, "active_queue_size": 16384.0, "cl_loss": 4.2921, "doc_norm": 1.6273, "encoder_q-embeddings": 780.7837, "encoder_q-layer.0": 558.4963, "encoder_q-layer.1": 671.0079, "encoder_q-layer.10": 224.681, "encoder_q-layer.11": 543.6743, "encoder_q-layer.2": 849.0673, "encoder_q-layer.3": 1008.1297, "encoder_q-layer.4": 1070.7155, "encoder_q-layer.5": 1020.1064, "encoder_q-layer.6": 901.4705, "encoder_q-layer.7": 669.063, "encoder_q-layer.8": 424.114, "encoder_q-layer.9": 206.1032, "epoch": 0.08, "inbatch_neg_score": 0.3823, "inbatch_pos_score": 0.8823, "learning_rate": 4.866666666666667e-05, "loss": 4.2921, "norm_diff": 0.2641, "norm_loss": 0.0, "num_token_doc": 66.8238, "num_token_overlap": 11.6994, "num_token_query": 31.4454, "num_token_union": 65.2022, "num_word_context": 202.7705, "num_word_doc": 49.8962, "num_word_query": 23.3843, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1069.4501, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3757, "query_norm": 1.8914, "queue_k_norm": 1.6251, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4454, "sent_len_1": 66.8238, "sent_len_max_0": 127.3113, "sent_len_max_1": 189.3237, "stdk": 0.0471, "stdq": 0.0413, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 12400 }, { "accuracy": 39.0625, "active_queue_size": 16384.0, "cl_loss": 4.2984, "doc_norm": 1.6289, "encoder_q-embeddings": 486.1187, "encoder_q-layer.0": 353.102, "encoder_q-layer.1": 410.6901, "encoder_q-layer.10": 240.9695, "encoder_q-layer.11": 619.1785, "encoder_q-layer.2": 506.0652, "encoder_q-layer.3": 545.197, "encoder_q-layer.4": 574.4099, "encoder_q-layer.5": 570.3838, "encoder_q-layer.6": 596.4766, "encoder_q-layer.7": 604.9417, "encoder_q-layer.8": 465.5479, "encoder_q-layer.9": 240.6816, "epoch": 0.08, "inbatch_neg_score": 0.3656, "inbatch_pos_score": 0.8608, "learning_rate": 4.8611111111111115e-05, "loss": 4.2984, "norm_diff": 0.2785, "norm_loss": 0.0, "num_token_doc": 66.9674, "num_token_overlap": 11.6944, "num_token_query": 31.3991, "num_token_union": 65.2277, "num_word_context": 202.5804, "num_word_doc": 49.9736, "num_word_query": 23.3229, "postclip_grad_norm": 1.0, "preclip_grad_norm": 720.6196, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3567, "query_norm": 1.9074, "queue_k_norm": 1.6248, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3991, "sent_len_1": 66.9674, "sent_len_max_0": 127.6075, "sent_len_max_1": 191.0337, "stdk": 0.0471, "stdq": 0.0409, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 12500 }, { "accuracy": 39.8438, "active_queue_size": 16384.0, "cl_loss": 4.2852, "doc_norm": 1.6184, "encoder_q-embeddings": 168.6224, "encoder_q-layer.0": 107.8472, "encoder_q-layer.1": 108.3718, "encoder_q-layer.10": 207.6001, "encoder_q-layer.11": 547.3237, "encoder_q-layer.2": 116.7526, "encoder_q-layer.3": 122.4195, "encoder_q-layer.4": 128.7541, "encoder_q-layer.5": 127.2549, "encoder_q-layer.6": 143.895, "encoder_q-layer.7": 159.3026, "encoder_q-layer.8": 190.5564, "encoder_q-layer.9": 175.3186, "epoch": 0.08, "inbatch_neg_score": 0.2907, "inbatch_pos_score": 0.79, "learning_rate": 4.855555555555556e-05, "loss": 4.2852, "norm_diff": 0.2892, "norm_loss": 0.0, "num_token_doc": 66.5659, "num_token_overlap": 11.6027, "num_token_query": 31.2443, "num_token_union": 64.9884, "num_word_context": 202.3229, "num_word_doc": 49.684, "num_word_query": 23.1861, "postclip_grad_norm": 1.0, "preclip_grad_norm": 300.4639, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2827, "query_norm": 1.9076, "queue_k_norm": 1.6227, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2443, "sent_len_1": 66.5659, "sent_len_max_0": 127.3713, "sent_len_max_1": 190.15, "stdk": 0.0468, "stdq": 0.0404, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 12600 }, { "accuracy": 42.7734, "active_queue_size": 16384.0, "cl_loss": 4.2544, "doc_norm": 1.6219, "encoder_q-embeddings": 277.0066, "encoder_q-layer.0": 189.3884, "encoder_q-layer.1": 214.8144, "encoder_q-layer.10": 214.2497, "encoder_q-layer.11": 617.4454, "encoder_q-layer.2": 236.9492, "encoder_q-layer.3": 256.392, "encoder_q-layer.4": 276.3355, "encoder_q-layer.5": 254.5683, "encoder_q-layer.6": 242.9601, "encoder_q-layer.7": 240.6158, "encoder_q-layer.8": 210.2596, "encoder_q-layer.9": 182.0165, "epoch": 0.08, "inbatch_neg_score": 0.2803, "inbatch_pos_score": 0.8232, "learning_rate": 4.85e-05, "loss": 4.2544, "norm_diff": 0.3649, "norm_loss": 0.0, "num_token_doc": 66.962, "num_token_overlap": 11.6691, "num_token_query": 31.4563, "num_token_union": 65.31, "num_word_context": 202.9312, "num_word_doc": 50.0057, "num_word_query": 23.3842, "postclip_grad_norm": 1.0, "preclip_grad_norm": 409.1134, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2712, "query_norm": 1.9868, "queue_k_norm": 1.6211, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4563, "sent_len_1": 66.962, "sent_len_max_0": 127.5, "sent_len_max_1": 190.2125, "stdk": 0.0471, "stdq": 0.0427, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 12700 }, { "accuracy": 39.2578, "active_queue_size": 16384.0, "cl_loss": 4.2271, "doc_norm": 1.6191, "encoder_q-embeddings": 175.2362, "encoder_q-layer.0": 111.0677, "encoder_q-layer.1": 114.1815, "encoder_q-layer.10": 233.1971, "encoder_q-layer.11": 662.4108, "encoder_q-layer.2": 124.484, "encoder_q-layer.3": 132.7778, "encoder_q-layer.4": 137.9388, "encoder_q-layer.5": 138.6385, "encoder_q-layer.6": 159.9353, "encoder_q-layer.7": 172.6203, "encoder_q-layer.8": 212.4399, "encoder_q-layer.9": 193.9608, "epoch": 0.08, "inbatch_neg_score": 0.23, "inbatch_pos_score": 0.7334, "learning_rate": 4.844444444444445e-05, "loss": 4.2271, "norm_diff": 0.3995, "norm_loss": 0.0, "num_token_doc": 66.702, "num_token_overlap": 11.6336, "num_token_query": 31.2883, "num_token_union": 65.084, "num_word_context": 201.9804, "num_word_doc": 49.7333, "num_word_query": 23.2314, "postclip_grad_norm": 1.0, "preclip_grad_norm": 334.1691, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2205, "query_norm": 2.0186, "queue_k_norm": 1.6188, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2883, "sent_len_1": 66.702, "sent_len_max_0": 127.4412, "sent_len_max_1": 190.2562, "stdk": 0.0471, "stdq": 0.0415, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 12800 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 4.2266, "doc_norm": 1.6182, "encoder_q-embeddings": 184.4989, "encoder_q-layer.0": 112.5677, "encoder_q-layer.1": 121.2453, "encoder_q-layer.10": 283.5943, "encoder_q-layer.11": 714.881, "encoder_q-layer.2": 136.5528, "encoder_q-layer.3": 147.0275, "encoder_q-layer.4": 159.3397, "encoder_q-layer.5": 170.6081, "encoder_q-layer.6": 188.5624, "encoder_q-layer.7": 204.031, "encoder_q-layer.8": 246.0656, "encoder_q-layer.9": 224.6888, "epoch": 0.08, "inbatch_neg_score": 0.2146, "inbatch_pos_score": 0.7231, "learning_rate": 4.838888888888889e-05, "loss": 4.2266, "norm_diff": 0.3858, "norm_loss": 0.0, "num_token_doc": 66.7544, "num_token_overlap": 11.6172, "num_token_query": 31.3508, "num_token_union": 65.1425, "num_word_context": 202.2816, "num_word_doc": 49.8399, "num_word_query": 23.2892, "postclip_grad_norm": 1.0, "preclip_grad_norm": 362.653, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2063, "query_norm": 2.004, "queue_k_norm": 1.6155, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3508, "sent_len_1": 66.7544, "sent_len_max_0": 127.63, "sent_len_max_1": 190.2287, "stdk": 0.0472, "stdq": 0.0399, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 12900 }, { "accuracy": 42.7734, "active_queue_size": 16384.0, "cl_loss": 4.1961, "doc_norm": 1.6124, "encoder_q-embeddings": 173.7633, "encoder_q-layer.0": 104.4436, "encoder_q-layer.1": 106.269, "encoder_q-layer.10": 232.191, "encoder_q-layer.11": 701.1664, "encoder_q-layer.2": 117.6238, "encoder_q-layer.3": 126.9741, "encoder_q-layer.4": 138.7611, "encoder_q-layer.5": 144.1816, "encoder_q-layer.6": 165.6458, "encoder_q-layer.7": 179.1816, "encoder_q-layer.8": 215.7431, "encoder_q-layer.9": 199.6493, "epoch": 0.08, "inbatch_neg_score": 0.194, "inbatch_pos_score": 0.7085, "learning_rate": 4.8333333333333334e-05, "loss": 4.1961, "norm_diff": 0.4221, "norm_loss": 0.0, "num_token_doc": 66.9594, "num_token_overlap": 11.7307, "num_token_query": 31.3857, "num_token_union": 65.186, "num_word_context": 202.6229, "num_word_doc": 49.9692, "num_word_query": 23.3097, "postclip_grad_norm": 1.0, "preclip_grad_norm": 356.4215, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1831, "query_norm": 2.0344, "queue_k_norm": 1.6125, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3857, "sent_len_1": 66.9594, "sent_len_max_0": 127.5637, "sent_len_max_1": 191.1525, "stdk": 0.0473, "stdq": 0.041, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 13000 }, { "accuracy": 41.5039, "active_queue_size": 16384.0, "cl_loss": 4.183, "doc_norm": 1.6033, "encoder_q-embeddings": 295.2609, "encoder_q-layer.0": 212.2771, "encoder_q-layer.1": 216.4682, "encoder_q-layer.10": 208.7536, "encoder_q-layer.11": 584.78, "encoder_q-layer.2": 261.603, "encoder_q-layer.3": 281.0782, "encoder_q-layer.4": 304.7971, "encoder_q-layer.5": 288.0836, "encoder_q-layer.6": 304.0645, "encoder_q-layer.7": 354.7305, "encoder_q-layer.8": 271.6853, "encoder_q-layer.9": 180.9975, "epoch": 0.09, "inbatch_neg_score": 0.1991, "inbatch_pos_score": 0.7134, "learning_rate": 4.8277777777777776e-05, "loss": 4.183, "norm_diff": 0.4137, "norm_loss": 0.0, "num_token_doc": 66.6642, "num_token_overlap": 11.7157, "num_token_query": 31.4469, "num_token_union": 65.0289, "num_word_context": 202.0048, "num_word_doc": 49.714, "num_word_query": 23.3703, "postclip_grad_norm": 1.0, "preclip_grad_norm": 431.8955, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1904, "query_norm": 2.0171, "queue_k_norm": 1.6041, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4469, "sent_len_1": 66.6642, "sent_len_max_0": 127.5775, "sent_len_max_1": 191.5112, "stdk": 0.0471, "stdq": 0.0403, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 13100 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.1862, "doc_norm": 1.5893, "encoder_q-embeddings": 154.1, "encoder_q-layer.0": 99.1639, "encoder_q-layer.1": 97.4632, "encoder_q-layer.10": 210.6855, "encoder_q-layer.11": 580.5246, "encoder_q-layer.2": 106.3912, "encoder_q-layer.3": 109.6356, "encoder_q-layer.4": 118.1557, "encoder_q-layer.5": 117.463, "encoder_q-layer.6": 135.2625, "encoder_q-layer.7": 163.8631, "encoder_q-layer.8": 196.1285, "encoder_q-layer.9": 185.9773, "epoch": 0.09, "inbatch_neg_score": 0.1641, "inbatch_pos_score": 0.6929, "learning_rate": 4.8222222222222225e-05, "loss": 4.1862, "norm_diff": 0.4486, "norm_loss": 0.0, "num_token_doc": 66.6488, "num_token_overlap": 11.6613, "num_token_query": 31.42, "num_token_union": 65.1287, "num_word_context": 202.392, "num_word_doc": 49.7679, "num_word_query": 23.3376, "postclip_grad_norm": 1.0, "preclip_grad_norm": 293.096, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1536, "query_norm": 2.0379, "queue_k_norm": 1.5937, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.42, "sent_len_1": 66.6488, "sent_len_max_0": 127.5387, "sent_len_max_1": 188.415, "stdk": 0.0469, "stdq": 0.0412, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 13200 }, { "accuracy": 42.3828, "active_queue_size": 16384.0, "cl_loss": 4.1362, "doc_norm": 1.5819, "encoder_q-embeddings": 367.1234, "encoder_q-layer.0": 251.3985, "encoder_q-layer.1": 284.3659, "encoder_q-layer.10": 222.4578, "encoder_q-layer.11": 566.2282, "encoder_q-layer.2": 339.6045, "encoder_q-layer.3": 378.3043, "encoder_q-layer.4": 435.0604, "encoder_q-layer.5": 480.112, "encoder_q-layer.6": 555.4835, "encoder_q-layer.7": 600.642, "encoder_q-layer.8": 478.1889, "encoder_q-layer.9": 213.8392, "epoch": 0.09, "inbatch_neg_score": 0.1672, "inbatch_pos_score": 0.6943, "learning_rate": 4.8166666666666674e-05, "loss": 4.1362, "norm_diff": 0.442, "norm_loss": 0.0, "num_token_doc": 66.7998, "num_token_overlap": 11.6367, "num_token_query": 31.2924, "num_token_union": 65.1294, "num_word_context": 202.1296, "num_word_doc": 49.8157, "num_word_query": 23.2439, "postclip_grad_norm": 1.0, "preclip_grad_norm": 604.0085, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1593, "query_norm": 2.0239, "queue_k_norm": 1.581, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2924, "sent_len_1": 66.7998, "sent_len_max_0": 127.6462, "sent_len_max_1": 190.6012, "stdk": 0.0471, "stdq": 0.0411, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 13300 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 4.1469, "doc_norm": 1.5611, "encoder_q-embeddings": 195.6711, "encoder_q-layer.0": 134.1119, "encoder_q-layer.1": 148.134, "encoder_q-layer.10": 220.6054, "encoder_q-layer.11": 586.3428, "encoder_q-layer.2": 170.9313, "encoder_q-layer.3": 191.5763, "encoder_q-layer.4": 222.1314, "encoder_q-layer.5": 207.5739, "encoder_q-layer.6": 254.7952, "encoder_q-layer.7": 244.91, "encoder_q-layer.8": 235.7031, "encoder_q-layer.9": 184.1982, "epoch": 0.09, "inbatch_neg_score": 0.1992, "inbatch_pos_score": 0.7329, "learning_rate": 4.811111111111111e-05, "loss": 4.1469, "norm_diff": 0.4546, "norm_loss": 0.0, "num_token_doc": 66.7865, "num_token_overlap": 11.6278, "num_token_query": 31.2732, "num_token_union": 65.0644, "num_word_context": 202.3133, "num_word_doc": 49.7849, "num_word_query": 23.2112, "postclip_grad_norm": 1.0, "preclip_grad_norm": 356.619, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1912, "query_norm": 2.0157, "queue_k_norm": 1.5634, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2732, "sent_len_1": 66.7865, "sent_len_max_0": 127.5062, "sent_len_max_1": 190.7637, "stdk": 0.0468, "stdq": 0.0421, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 13400 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 4.1404, "doc_norm": 1.5415, "encoder_q-embeddings": 167.2921, "encoder_q-layer.0": 109.3327, "encoder_q-layer.1": 113.5025, "encoder_q-layer.10": 204.2593, "encoder_q-layer.11": 569.0875, "encoder_q-layer.2": 127.5551, "encoder_q-layer.3": 132.3395, "encoder_q-layer.4": 136.1385, "encoder_q-layer.5": 134.5279, "encoder_q-layer.6": 156.1323, "encoder_q-layer.7": 170.4422, "encoder_q-layer.8": 185.135, "encoder_q-layer.9": 172.3606, "epoch": 0.09, "inbatch_neg_score": 0.1993, "inbatch_pos_score": 0.7095, "learning_rate": 4.805555555555556e-05, "loss": 4.1404, "norm_diff": 0.4727, "norm_loss": 0.0, "num_token_doc": 66.8135, "num_token_overlap": 11.6785, "num_token_query": 31.329, "num_token_union": 65.1329, "num_word_context": 202.457, "num_word_doc": 49.8645, "num_word_query": 23.2524, "postclip_grad_norm": 1.0, "preclip_grad_norm": 303.1862, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1884, "query_norm": 2.0142, "queue_k_norm": 1.5457, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.329, "sent_len_1": 66.8135, "sent_len_max_0": 127.5362, "sent_len_max_1": 191.3862, "stdk": 0.0465, "stdq": 0.0411, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 13500 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 4.119, "doc_norm": 1.53, "encoder_q-embeddings": 317.6852, "encoder_q-layer.0": 208.5282, "encoder_q-layer.1": 214.434, "encoder_q-layer.10": 418.2434, "encoder_q-layer.11": 1041.5386, "encoder_q-layer.2": 227.7962, "encoder_q-layer.3": 235.8649, "encoder_q-layer.4": 249.5224, "encoder_q-layer.5": 250.7153, "encoder_q-layer.6": 288.8141, "encoder_q-layer.7": 309.0522, "encoder_q-layer.8": 379.6423, "encoder_q-layer.9": 355.5394, "epoch": 0.09, "inbatch_neg_score": 0.1997, "inbatch_pos_score": 0.7129, "learning_rate": 4.8e-05, "loss": 4.119, "norm_diff": 0.5611, "norm_loss": 0.0, "num_token_doc": 66.453, "num_token_overlap": 11.5879, "num_token_query": 31.3728, "num_token_union": 65.0044, "num_word_context": 201.9258, "num_word_doc": 49.5732, "num_word_query": 23.284, "postclip_grad_norm": 1.0, "preclip_grad_norm": 565.5772, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1866, "query_norm": 2.0911, "queue_k_norm": 1.5317, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3728, "sent_len_1": 66.453, "sent_len_max_0": 127.3175, "sent_len_max_1": 191.5863, "stdk": 0.0465, "stdq": 0.0411, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 13600 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 4.0904, "doc_norm": 1.5168, "encoder_q-embeddings": 334.8596, "encoder_q-layer.0": 214.1033, "encoder_q-layer.1": 237.5063, "encoder_q-layer.10": 388.3374, "encoder_q-layer.11": 1030.8599, "encoder_q-layer.2": 266.5239, "encoder_q-layer.3": 297.4079, "encoder_q-layer.4": 328.6571, "encoder_q-layer.5": 330.5549, "encoder_q-layer.6": 394.7219, "encoder_q-layer.7": 430.2151, "encoder_q-layer.8": 420.0129, "encoder_q-layer.9": 352.2639, "epoch": 0.09, "inbatch_neg_score": 0.2191, "inbatch_pos_score": 0.7573, "learning_rate": 4.794444444444445e-05, "loss": 4.0904, "norm_diff": 0.645, "norm_loss": 0.0, "num_token_doc": 66.8336, "num_token_overlap": 11.6602, "num_token_query": 31.4688, "num_token_union": 65.2249, "num_word_context": 202.5762, "num_word_doc": 49.8759, "num_word_query": 23.3809, "postclip_grad_norm": 1.0, "preclip_grad_norm": 601.3629, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2051, "query_norm": 2.1617, "queue_k_norm": 1.5199, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4688, "sent_len_1": 66.8336, "sent_len_max_0": 127.5738, "sent_len_max_1": 188.3812, "stdk": 0.0464, "stdq": 0.0415, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 13700 }, { "accuracy": 41.7969, "active_queue_size": 16384.0, "cl_loss": 4.0768, "doc_norm": 1.5058, "encoder_q-embeddings": 322.6227, "encoder_q-layer.0": 202.5038, "encoder_q-layer.1": 213.0674, "encoder_q-layer.10": 384.2502, "encoder_q-layer.11": 1024.9121, "encoder_q-layer.2": 233.1662, "encoder_q-layer.3": 244.2247, "encoder_q-layer.4": 255.5154, "encoder_q-layer.5": 256.9219, "encoder_q-layer.6": 294.5076, "encoder_q-layer.7": 325.3485, "encoder_q-layer.8": 375.3231, "encoder_q-layer.9": 344.1721, "epoch": 0.09, "inbatch_neg_score": 0.2727, "inbatch_pos_score": 0.7944, "learning_rate": 4.7888888888888886e-05, "loss": 4.0768, "norm_diff": 0.7123, "norm_loss": 0.0, "num_token_doc": 66.8974, "num_token_overlap": 11.6794, "num_token_query": 31.322, "num_token_union": 65.1547, "num_word_context": 202.2866, "num_word_doc": 49.9242, "num_word_query": 23.2699, "postclip_grad_norm": 1.0, "preclip_grad_norm": 562.0683, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2583, "query_norm": 2.2182, "queue_k_norm": 1.5072, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.322, "sent_len_1": 66.8974, "sent_len_max_0": 127.4475, "sent_len_max_1": 190.1413, "stdk": 0.0464, "stdq": 0.0419, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 13800 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 4.0582, "doc_norm": 1.4953, "encoder_q-embeddings": 301.4027, "encoder_q-layer.0": 201.2521, "encoder_q-layer.1": 198.4894, "encoder_q-layer.10": 377.1788, "encoder_q-layer.11": 1051.0576, "encoder_q-layer.2": 215.4608, "encoder_q-layer.3": 221.971, "encoder_q-layer.4": 231.4843, "encoder_q-layer.5": 238.989, "encoder_q-layer.6": 275.6223, "encoder_q-layer.7": 296.7495, "encoder_q-layer.8": 357.674, "encoder_q-layer.9": 325.023, "epoch": 0.09, "inbatch_neg_score": 0.3183, "inbatch_pos_score": 0.8608, "learning_rate": 4.7833333333333335e-05, "loss": 4.0582, "norm_diff": 0.7893, "norm_loss": 0.0, "num_token_doc": 66.7716, "num_token_overlap": 11.6087, "num_token_query": 31.2016, "num_token_union": 65.0394, "num_word_context": 202.2553, "num_word_doc": 49.8263, "num_word_query": 23.1519, "postclip_grad_norm": 1.0, "preclip_grad_norm": 560.1829, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3035, "query_norm": 2.2846, "queue_k_norm": 1.496, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2016, "sent_len_1": 66.7716, "sent_len_max_0": 127.6287, "sent_len_max_1": 190.6538, "stdk": 0.0463, "stdq": 0.0426, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 13900 }, { "accuracy": 41.5039, "active_queue_size": 16384.0, "cl_loss": 4.0303, "doc_norm": 1.4913, "encoder_q-embeddings": 312.292, "encoder_q-layer.0": 199.8091, "encoder_q-layer.1": 208.6362, "encoder_q-layer.10": 388.8409, "encoder_q-layer.11": 1149.9507, "encoder_q-layer.2": 226.471, "encoder_q-layer.3": 224.7833, "encoder_q-layer.4": 243.8522, "encoder_q-layer.5": 251.9605, "encoder_q-layer.6": 279.4806, "encoder_q-layer.7": 333.5609, "encoder_q-layer.8": 371.3035, "encoder_q-layer.9": 331.1674, "epoch": 0.09, "inbatch_neg_score": 0.4023, "inbatch_pos_score": 0.9194, "learning_rate": 4.7777777777777784e-05, "loss": 4.0303, "norm_diff": 0.8818, "norm_loss": 0.0, "num_token_doc": 66.6695, "num_token_overlap": 11.6323, "num_token_query": 31.2536, "num_token_union": 64.9797, "num_word_context": 202.1215, "num_word_doc": 49.7023, "num_word_query": 23.2096, "postclip_grad_norm": 1.0, "preclip_grad_norm": 605.3858, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3855, "query_norm": 2.373, "queue_k_norm": 1.4886, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2536, "sent_len_1": 66.6695, "sent_len_max_0": 127.445, "sent_len_max_1": 189.4938, "stdk": 0.0463, "stdq": 0.0414, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 14000 }, { "accuracy": 42.8711, "active_queue_size": 16384.0, "cl_loss": 4.0105, "doc_norm": 1.4786, "encoder_q-embeddings": 317.1642, "encoder_q-layer.0": 195.5773, "encoder_q-layer.1": 200.9057, "encoder_q-layer.10": 418.4061, "encoder_q-layer.11": 1154.8893, "encoder_q-layer.2": 218.3671, "encoder_q-layer.3": 226.9542, "encoder_q-layer.4": 236.0973, "encoder_q-layer.5": 234.5791, "encoder_q-layer.6": 273.1447, "encoder_q-layer.7": 300.5866, "encoder_q-layer.8": 374.3148, "encoder_q-layer.9": 330.0453, "epoch": 0.09, "inbatch_neg_score": 0.5127, "inbatch_pos_score": 1.0088, "learning_rate": 4.7722222222222226e-05, "loss": 4.0105, "norm_diff": 0.9992, "norm_loss": 0.0, "num_token_doc": 66.8157, "num_token_overlap": 11.6277, "num_token_query": 31.3418, "num_token_union": 65.1867, "num_word_context": 202.6115, "num_word_doc": 49.8326, "num_word_query": 23.2669, "postclip_grad_norm": 1.0, "preclip_grad_norm": 602.2215, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4919, "query_norm": 2.4778, "queue_k_norm": 1.4838, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3418, "sent_len_1": 66.8157, "sent_len_max_0": 127.4325, "sent_len_max_1": 190.5863, "stdk": 0.0458, "stdq": 0.0413, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 14100 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 3.983, "doc_norm": 1.4804, "encoder_q-embeddings": 332.9064, "encoder_q-layer.0": 209.5097, "encoder_q-layer.1": 211.7673, "encoder_q-layer.10": 397.1964, "encoder_q-layer.11": 1156.7002, "encoder_q-layer.2": 227.5119, "encoder_q-layer.3": 243.7005, "encoder_q-layer.4": 250.5387, "encoder_q-layer.5": 239.4563, "encoder_q-layer.6": 268.782, "encoder_q-layer.7": 297.7426, "encoder_q-layer.8": 381.8999, "encoder_q-layer.9": 338.0317, "epoch": 0.09, "inbatch_neg_score": 0.6297, "inbatch_pos_score": 1.1377, "learning_rate": 4.766666666666667e-05, "loss": 3.983, "norm_diff": 1.135, "norm_loss": 0.0, "num_token_doc": 66.4601, "num_token_overlap": 11.6721, "num_token_query": 31.4218, "num_token_union": 64.9964, "num_word_context": 202.1791, "num_word_doc": 49.6157, "num_word_query": 23.3427, "postclip_grad_norm": 1.0, "preclip_grad_norm": 602.1104, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6074, "query_norm": 2.6155, "queue_k_norm": 1.4811, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4218, "sent_len_1": 66.4601, "sent_len_max_0": 127.4375, "sent_len_max_1": 188.7088, "stdk": 0.0458, "stdq": 0.0417, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 14200 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.9441, "doc_norm": 1.4868, "encoder_q-embeddings": 301.9677, "encoder_q-layer.0": 192.6337, "encoder_q-layer.1": 188.2009, "encoder_q-layer.10": 330.6811, "encoder_q-layer.11": 967.3118, "encoder_q-layer.2": 200.6568, "encoder_q-layer.3": 202.422, "encoder_q-layer.4": 209.7555, "encoder_q-layer.5": 212.3474, "encoder_q-layer.6": 255.0321, "encoder_q-layer.7": 269.3316, "encoder_q-layer.8": 312.4057, "encoder_q-layer.9": 279.0658, "epoch": 0.09, "inbatch_neg_score": 0.8303, "inbatch_pos_score": 1.3535, "learning_rate": 4.761111111111111e-05, "loss": 3.9441, "norm_diff": 1.3539, "norm_loss": 0.0, "num_token_doc": 66.7474, "num_token_overlap": 11.6831, "num_token_query": 31.3394, "num_token_union": 65.0806, "num_word_context": 202.597, "num_word_doc": 49.7572, "num_word_query": 23.2534, "postclip_grad_norm": 1.0, "preclip_grad_norm": 517.0323, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8047, "query_norm": 2.8407, "queue_k_norm": 1.4846, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3394, "sent_len_1": 66.7474, "sent_len_max_0": 127.4762, "sent_len_max_1": 191.54, "stdk": 0.0458, "stdq": 0.0409, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 14300 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.936, "doc_norm": 1.4897, "encoder_q-embeddings": 297.9242, "encoder_q-layer.0": 184.2117, "encoder_q-layer.1": 190.8195, "encoder_q-layer.10": 408.3604, "encoder_q-layer.11": 1067.1229, "encoder_q-layer.2": 212.5764, "encoder_q-layer.3": 229.8883, "encoder_q-layer.4": 240.4927, "encoder_q-layer.5": 243.503, "encoder_q-layer.6": 290.9296, "encoder_q-layer.7": 318.2519, "encoder_q-layer.8": 396.8415, "encoder_q-layer.9": 360.7882, "epoch": 0.09, "inbatch_neg_score": 1.0563, "inbatch_pos_score": 1.6035, "learning_rate": 4.755555555555556e-05, "loss": 3.936, "norm_diff": 1.5795, "norm_loss": 0.0, "num_token_doc": 66.8886, "num_token_overlap": 11.6917, "num_token_query": 31.3153, "num_token_union": 65.1546, "num_word_context": 201.8637, "num_word_doc": 49.9128, "num_word_query": 23.2717, "postclip_grad_norm": 1.0, "preclip_grad_norm": 561.9155, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0254, "query_norm": 3.0691, "queue_k_norm": 1.4935, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3153, "sent_len_1": 66.8886, "sent_len_max_0": 127.46, "sent_len_max_1": 188.1975, "stdk": 0.0454, "stdq": 0.0438, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 14400 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.9475, "doc_norm": 1.5153, "encoder_q-embeddings": 280.1198, "encoder_q-layer.0": 178.2286, "encoder_q-layer.1": 178.7622, "encoder_q-layer.10": 437.4136, "encoder_q-layer.11": 944.4221, "encoder_q-layer.2": 206.6804, "encoder_q-layer.3": 215.498, "encoder_q-layer.4": 227.8084, "encoder_q-layer.5": 225.8816, "encoder_q-layer.6": 275.5679, "encoder_q-layer.7": 329.5649, "encoder_q-layer.8": 371.9364, "encoder_q-layer.9": 337.7792, "epoch": 0.09, "inbatch_neg_score": 1.3444, "inbatch_pos_score": 1.8525, "learning_rate": 4.75e-05, "loss": 3.9475, "norm_diff": 1.6926, "norm_loss": 0.0, "num_token_doc": 66.581, "num_token_overlap": 11.6753, "num_token_query": 31.3681, "num_token_union": 64.9775, "num_word_context": 202.0686, "num_word_doc": 49.6426, "num_word_query": 23.2833, "postclip_grad_norm": 1.0, "preclip_grad_norm": 538.7458, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3105, "query_norm": 3.2078, "queue_k_norm": 1.5115, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3681, "sent_len_1": 66.581, "sent_len_max_0": 127.4475, "sent_len_max_1": 190.4787, "stdk": 0.0457, "stdq": 0.0407, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 14500 }, { "accuracy": 42.4805, "active_queue_size": 16384.0, "cl_loss": 4.0083, "doc_norm": 1.5405, "encoder_q-embeddings": 294.2544, "encoder_q-layer.0": 188.0879, "encoder_q-layer.1": 192.3069, "encoder_q-layer.10": 407.2005, "encoder_q-layer.11": 971.0191, "encoder_q-layer.2": 211.0499, "encoder_q-layer.3": 219.6028, "encoder_q-layer.4": 234.2975, "encoder_q-layer.5": 239.3068, "encoder_q-layer.6": 281.5209, "encoder_q-layer.7": 309.505, "encoder_q-layer.8": 371.6061, "encoder_q-layer.9": 340.4289, "epoch": 0.1, "inbatch_neg_score": 1.6226, "inbatch_pos_score": 2.1191, "learning_rate": 4.7444444444444445e-05, "loss": 4.0083, "norm_diff": 1.6891, "norm_loss": 0.0, "num_token_doc": 66.7041, "num_token_overlap": 11.6603, "num_token_query": 31.3899, "num_token_union": 65.0961, "num_word_context": 201.9952, "num_word_doc": 49.7708, "num_word_query": 23.3189, "postclip_grad_norm": 1.0, "preclip_grad_norm": 535.8959, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.5898, "query_norm": 3.2296, "queue_k_norm": 1.5369, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3899, "sent_len_1": 66.7041, "sent_len_max_0": 127.4775, "sent_len_max_1": 189.0563, "stdk": 0.0456, "stdq": 0.0419, "stdqueue_k": 0.0456, "stdqueue_q": 0.0, "step": 14600 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 4.0743, "doc_norm": 1.5701, "encoder_q-embeddings": 322.481, "encoder_q-layer.0": 228.7927, "encoder_q-layer.1": 235.7998, "encoder_q-layer.10": 375.6318, "encoder_q-layer.11": 1022.7432, "encoder_q-layer.2": 277.0523, "encoder_q-layer.3": 287.0527, "encoder_q-layer.4": 283.2327, "encoder_q-layer.5": 293.4836, "encoder_q-layer.6": 326.9604, "encoder_q-layer.7": 347.9978, "encoder_q-layer.8": 381.7783, "encoder_q-layer.9": 338.5402, "epoch": 0.1, "inbatch_neg_score": 1.7994, "inbatch_pos_score": 2.3281, "learning_rate": 4.7388888888888894e-05, "loss": 4.0743, "norm_diff": 1.6328, "norm_loss": 0.0, "num_token_doc": 66.9277, "num_token_overlap": 11.637, "num_token_query": 31.2391, "num_token_union": 65.1764, "num_word_context": 202.4492, "num_word_doc": 49.9424, "num_word_query": 23.1807, "postclip_grad_norm": 1.0, "preclip_grad_norm": 586.9959, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.7676, "query_norm": 3.2029, "queue_k_norm": 1.5696, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2391, "sent_len_1": 66.9277, "sent_len_max_0": 127.38, "sent_len_max_1": 188.1125, "stdk": 0.0454, "stdq": 0.0438, "stdqueue_k": 0.0456, "stdqueue_q": 0.0, "step": 14700 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 4.1326, "doc_norm": 1.6095, "encoder_q-embeddings": 855.6896, "encoder_q-layer.0": 565.9187, "encoder_q-layer.1": 682.9757, "encoder_q-layer.10": 372.6512, "encoder_q-layer.11": 1054.0031, "encoder_q-layer.2": 791.5149, "encoder_q-layer.3": 865.9908, "encoder_q-layer.4": 1009.0903, "encoder_q-layer.5": 1074.4965, "encoder_q-layer.6": 986.7878, "encoder_q-layer.7": 896.1556, "encoder_q-layer.8": 453.6829, "encoder_q-layer.9": 321.1192, "epoch": 0.1, "inbatch_neg_score": 1.8413, "inbatch_pos_score": 2.3574, "learning_rate": 4.7333333333333336e-05, "loss": 4.1326, "norm_diff": 1.2997, "norm_loss": 0.0, "num_token_doc": 66.788, "num_token_overlap": 11.6415, "num_token_query": 31.3363, "num_token_union": 65.1425, "num_word_context": 202.0926, "num_word_doc": 49.8357, "num_word_query": 23.2804, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1173.4997, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.8125, "query_norm": 2.9092, "queue_k_norm": 1.6042, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3363, "sent_len_1": 66.788, "sent_len_max_0": 127.535, "sent_len_max_1": 189.6087, "stdk": 0.0456, "stdq": 0.0438, "stdqueue_k": 0.0456, "stdqueue_q": 0.0, "step": 14800 }, { "accuracy": 39.8438, "active_queue_size": 16384.0, "cl_loss": 4.2192, "doc_norm": 1.6434, "encoder_q-embeddings": 914.5464, "encoder_q-layer.0": 591.7772, "encoder_q-layer.1": 678.8696, "encoder_q-layer.10": 374.3077, "encoder_q-layer.11": 1284.1171, "encoder_q-layer.2": 763.1987, "encoder_q-layer.3": 840.915, "encoder_q-layer.4": 980.1602, "encoder_q-layer.5": 869.3123, "encoder_q-layer.6": 976.2846, "encoder_q-layer.7": 780.2358, "encoder_q-layer.8": 444.9568, "encoder_q-layer.9": 331.5107, "epoch": 0.1, "inbatch_neg_score": 1.8519, "inbatch_pos_score": 2.3398, "learning_rate": 4.727777777777778e-05, "loss": 4.2192, "norm_diff": 1.0478, "norm_loss": 0.0, "num_token_doc": 66.7762, "num_token_overlap": 11.7145, "num_token_query": 31.4865, "num_token_union": 65.1564, "num_word_context": 202.4441, "num_word_doc": 49.8315, "num_word_query": 23.4014, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1201.0304, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.8281, "query_norm": 2.6912, "queue_k_norm": 1.641, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4865, "sent_len_1": 66.7762, "sent_len_max_0": 127.56, "sent_len_max_1": 189.76, "stdk": 0.0455, "stdq": 0.0434, "stdqueue_k": 0.0456, "stdqueue_q": 0.0, "step": 14900 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.276, "doc_norm": 1.6804, "encoder_q-embeddings": 1364.183, "encoder_q-layer.0": 1019.2643, "encoder_q-layer.1": 1152.5861, "encoder_q-layer.10": 395.4765, "encoder_q-layer.11": 1401.205, "encoder_q-layer.2": 1072.6539, "encoder_q-layer.3": 1110.7225, "encoder_q-layer.4": 1215.1963, "encoder_q-layer.5": 1025.4656, "encoder_q-layer.6": 1211.3684, "encoder_q-layer.7": 825.4375, "encoder_q-layer.8": 421.6929, "encoder_q-layer.9": 339.5263, "epoch": 0.1, "inbatch_neg_score": 1.9125, "inbatch_pos_score": 2.4434, "learning_rate": 4.722222222222222e-05, "loss": 4.276, "norm_diff": 1.0005, "norm_loss": 0.0, "num_token_doc": 66.6785, "num_token_overlap": 11.6264, "num_token_query": 31.3179, "num_token_union": 65.0433, "num_word_context": 202.1886, "num_word_doc": 49.7481, "num_word_query": 23.2458, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1568.2132, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.8936, "query_norm": 2.6809, "queue_k_norm": 1.6748, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3179, "sent_len_1": 66.6785, "sent_len_max_0": 127.3975, "sent_len_max_1": 189.1725, "stdk": 0.0457, "stdq": 0.0448, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 15000 }, { "accuracy": 41.3086, "active_queue_size": 16384.0, "cl_loss": 4.3026, "doc_norm": 1.7106, "encoder_q-embeddings": 1639.4872, "encoder_q-layer.0": 1148.0105, "encoder_q-layer.1": 1390.4769, "encoder_q-layer.10": 411.4196, "encoder_q-layer.11": 1353.1526, "encoder_q-layer.2": 1510.1252, "encoder_q-layer.3": 1569.9535, "encoder_q-layer.4": 1640.5775, "encoder_q-layer.5": 1636.5062, "encoder_q-layer.6": 1455.3153, "encoder_q-layer.7": 1086.6925, "encoder_q-layer.8": 608.5215, "encoder_q-layer.9": 371.4241, "epoch": 0.1, "inbatch_neg_score": 1.6725, "inbatch_pos_score": 2.1797, "learning_rate": 4.716666666666667e-05, "loss": 4.3026, "norm_diff": 0.6534, "norm_loss": 0.0, "num_token_doc": 66.8666, "num_token_overlap": 11.647, "num_token_query": 31.2827, "num_token_union": 65.1564, "num_word_context": 202.6528, "num_word_doc": 49.8603, "num_word_query": 23.2092, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1956.623, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.6533, "query_norm": 2.364, "queue_k_norm": 1.7055, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2827, "sent_len_1": 66.8666, "sent_len_max_0": 127.5037, "sent_len_max_1": 190.235, "stdk": 0.0458, "stdq": 0.0437, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 15100 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 4.3185, "doc_norm": 1.7334, "encoder_q-embeddings": 413.0594, "encoder_q-layer.0": 275.3899, "encoder_q-layer.1": 327.1408, "encoder_q-layer.10": 200.6979, "encoder_q-layer.11": 703.528, "encoder_q-layer.2": 383.5666, "encoder_q-layer.3": 434.9008, "encoder_q-layer.4": 503.8839, "encoder_q-layer.5": 555.1446, "encoder_q-layer.6": 612.1998, "encoder_q-layer.7": 541.8705, "encoder_q-layer.8": 300.0852, "encoder_q-layer.9": 189.5482, "epoch": 0.1, "inbatch_neg_score": 1.3601, "inbatch_pos_score": 1.8838, "learning_rate": 4.711111111111111e-05, "loss": 4.3185, "norm_diff": 0.3202, "norm_loss": 0.0, "num_token_doc": 66.6441, "num_token_overlap": 11.6429, "num_token_query": 31.2818, "num_token_union": 65.0343, "num_word_context": 202.5227, "num_word_doc": 49.7717, "num_word_query": 23.2354, "postclip_grad_norm": 1.0, "preclip_grad_norm": 663.3702, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.3525, "query_norm": 2.0535, "queue_k_norm": 1.7313, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2818, "sent_len_1": 66.6441, "sent_len_max_0": 127.5337, "sent_len_max_1": 187.8375, "stdk": 0.0459, "stdq": 0.0462, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 15200 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 4.2956, "doc_norm": 1.7467, "encoder_q-embeddings": 1375.9156, "encoder_q-layer.0": 1027.2855, "encoder_q-layer.1": 1061.8171, "encoder_q-layer.10": 222.9271, "encoder_q-layer.11": 643.1901, "encoder_q-layer.2": 1100.6982, "encoder_q-layer.3": 1031.2583, "encoder_q-layer.4": 1006.8261, "encoder_q-layer.5": 875.7535, "encoder_q-layer.6": 791.0827, "encoder_q-layer.7": 709.2346, "encoder_q-layer.8": 531.5905, "encoder_q-layer.9": 256.632, "epoch": 0.1, "inbatch_neg_score": 1.1226, "inbatch_pos_score": 1.6621, "learning_rate": 4.7055555555555555e-05, "loss": 4.2956, "norm_diff": 0.1501, "norm_loss": 0.0, "num_token_doc": 66.6891, "num_token_overlap": 11.7262, "num_token_query": 31.4531, "num_token_union": 65.0642, "num_word_context": 202.3601, "num_word_doc": 49.7978, "num_word_query": 23.3591, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1351.3171, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1162, "query_norm": 1.8968, "queue_k_norm": 1.7491, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4531, "sent_len_1": 66.6891, "sent_len_max_0": 127.4887, "sent_len_max_1": 190.1287, "stdk": 0.0457, "stdq": 0.0448, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 15300 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 4.2688, "doc_norm": 1.7676, "encoder_q-embeddings": 705.8589, "encoder_q-layer.0": 501.6999, "encoder_q-layer.1": 534.2538, "encoder_q-layer.10": 213.3451, "encoder_q-layer.11": 653.1569, "encoder_q-layer.2": 609.5911, "encoder_q-layer.3": 671.2292, "encoder_q-layer.4": 787.7353, "encoder_q-layer.5": 785.3096, "encoder_q-layer.6": 718.7473, "encoder_q-layer.7": 698.3932, "encoder_q-layer.8": 577.8335, "encoder_q-layer.9": 314.9633, "epoch": 0.1, "inbatch_neg_score": 1.1168, "inbatch_pos_score": 1.666, "learning_rate": 4.7e-05, "loss": 4.2688, "norm_diff": 0.1112, "norm_loss": 0.0, "num_token_doc": 66.7011, "num_token_overlap": 11.6577, "num_token_query": 31.3145, "num_token_union": 64.9901, "num_word_context": 202.3102, "num_word_doc": 49.7684, "num_word_query": 23.2489, "postclip_grad_norm": 1.0, "preclip_grad_norm": 908.8798, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.1094, "query_norm": 1.8788, "queue_k_norm": 1.7659, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3145, "sent_len_1": 66.7011, "sent_len_max_0": 127.3912, "sent_len_max_1": 188.5575, "stdk": 0.0459, "stdq": 0.045, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 15400 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 4.3464, "doc_norm": 1.7823, "encoder_q-embeddings": 341.1169, "encoder_q-layer.0": 235.4181, "encoder_q-layer.1": 270.5798, "encoder_q-layer.10": 203.0379, "encoder_q-layer.11": 645.3408, "encoder_q-layer.2": 307.8049, "encoder_q-layer.3": 336.2921, "encoder_q-layer.4": 353.4934, "encoder_q-layer.5": 333.004, "encoder_q-layer.6": 333.694, "encoder_q-layer.7": 324.9255, "encoder_q-layer.8": 302.0583, "encoder_q-layer.9": 233.4032, "epoch": 0.1, "inbatch_neg_score": 0.9148, "inbatch_pos_score": 1.4365, "learning_rate": 4.6944444444444446e-05, "loss": 4.3464, "norm_diff": 0.0194, "norm_loss": 0.0, "num_token_doc": 66.574, "num_token_overlap": 11.6437, "num_token_query": 31.3239, "num_token_union": 64.9438, "num_word_context": 202.1022, "num_word_doc": 49.6255, "num_word_query": 23.2573, "postclip_grad_norm": 1.0, "preclip_grad_norm": 505.8721, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9102, "query_norm": 1.7923, "queue_k_norm": 1.775, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3239, "sent_len_1": 66.574, "sent_len_max_0": 127.5012, "sent_len_max_1": 191.6125, "stdk": 0.0463, "stdq": 0.0444, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 15500 }, { "accuracy": 39.2578, "active_queue_size": 16384.0, "cl_loss": 4.2627, "doc_norm": 1.7804, "encoder_q-embeddings": 796.403, "encoder_q-layer.0": 599.5275, "encoder_q-layer.1": 640.3733, "encoder_q-layer.10": 245.426, "encoder_q-layer.11": 637.8956, "encoder_q-layer.2": 749.1224, "encoder_q-layer.3": 854.0546, "encoder_q-layer.4": 865.3405, "encoder_q-layer.5": 760.2281, "encoder_q-layer.6": 841.6423, "encoder_q-layer.7": 907.7828, "encoder_q-layer.8": 831.5009, "encoder_q-layer.9": 498.0597, "epoch": 0.1, "inbatch_neg_score": 0.6992, "inbatch_pos_score": 1.2109, "learning_rate": 4.6888888888888895e-05, "loss": 4.2627, "norm_diff": 0.0144, "norm_loss": 0.0, "num_token_doc": 66.6978, "num_token_overlap": 11.7599, "num_token_query": 31.5434, "num_token_union": 65.1138, "num_word_context": 202.1689, "num_word_doc": 49.7644, "num_word_query": 23.4292, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1083.301, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6938, "query_norm": 1.7916, "queue_k_norm": 1.7819, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5434, "sent_len_1": 66.6978, "sent_len_max_0": 127.4125, "sent_len_max_1": 189.925, "stdk": 0.046, "stdq": 0.0448, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 15600 }, { "accuracy": 41.8945, "active_queue_size": 16384.0, "cl_loss": 4.293, "doc_norm": 1.7729, "encoder_q-embeddings": 722.9042, "encoder_q-layer.0": 525.7548, "encoder_q-layer.1": 556.8657, "encoder_q-layer.10": 226.6517, "encoder_q-layer.11": 700.2053, "encoder_q-layer.2": 640.2538, "encoder_q-layer.3": 652.8496, "encoder_q-layer.4": 703.5811, "encoder_q-layer.5": 619.8657, "encoder_q-layer.6": 578.3979, "encoder_q-layer.7": 555.696, "encoder_q-layer.8": 413.7215, "encoder_q-layer.9": 254.6791, "epoch": 0.1, "inbatch_neg_score": 0.762, "inbatch_pos_score": 1.292, "learning_rate": 4.683333333333334e-05, "loss": 4.293, "norm_diff": 0.0205, "norm_loss": 0.0, "num_token_doc": 66.7204, "num_token_overlap": 11.6617, "num_token_query": 31.3935, "num_token_union": 65.091, "num_word_context": 202.7364, "num_word_doc": 49.7693, "num_word_query": 23.321, "postclip_grad_norm": 1.0, "preclip_grad_norm": 857.5652, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7588, "query_norm": 1.7626, "queue_k_norm": 1.7801, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3935, "sent_len_1": 66.7204, "sent_len_max_0": 127.4262, "sent_len_max_1": 190.4437, "stdk": 0.0459, "stdq": 0.0443, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 15700 }, { "accuracy": 40.5273, "active_queue_size": 16384.0, "cl_loss": 4.2573, "doc_norm": 1.7745, "encoder_q-embeddings": 620.8301, "encoder_q-layer.0": 476.8386, "encoder_q-layer.1": 567.6379, "encoder_q-layer.10": 205.3648, "encoder_q-layer.11": 639.9458, "encoder_q-layer.2": 663.8678, "encoder_q-layer.3": 742.5129, "encoder_q-layer.4": 749.9503, "encoder_q-layer.5": 705.8915, "encoder_q-layer.6": 715.2797, "encoder_q-layer.7": 728.9113, "encoder_q-layer.8": 484.3076, "encoder_q-layer.9": 226.0797, "epoch": 0.1, "inbatch_neg_score": 0.8003, "inbatch_pos_score": 1.3252, "learning_rate": 4.677777777777778e-05, "loss": 4.2573, "norm_diff": 0.0248, "norm_loss": 0.0, "num_token_doc": 66.8913, "num_token_overlap": 11.6477, "num_token_query": 31.4048, "num_token_union": 65.206, "num_word_context": 202.2983, "num_word_doc": 49.8617, "num_word_query": 23.2994, "postclip_grad_norm": 1.0, "preclip_grad_norm": 898.4082, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7974, "query_norm": 1.7496, "queue_k_norm": 1.7743, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4048, "sent_len_1": 66.8913, "sent_len_max_0": 127.6063, "sent_len_max_1": 189.79, "stdk": 0.0464, "stdq": 0.0443, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 15800 }, { "accuracy": 39.3555, "active_queue_size": 16384.0, "cl_loss": 4.2662, "doc_norm": 1.7663, "encoder_q-embeddings": 1789.387, "encoder_q-layer.0": 1416.5577, "encoder_q-layer.1": 1641.3754, "encoder_q-layer.10": 206.3561, "encoder_q-layer.11": 808.734, "encoder_q-layer.2": 1907.8721, "encoder_q-layer.3": 1879.6638, "encoder_q-layer.4": 2047.4443, "encoder_q-layer.5": 1626.4691, "encoder_q-layer.6": 956.7148, "encoder_q-layer.7": 703.7056, "encoder_q-layer.8": 484.1981, "encoder_q-layer.9": 245.1279, "epoch": 0.1, "inbatch_neg_score": 0.8337, "inbatch_pos_score": 1.3398, "learning_rate": 4.672222222222222e-05, "loss": 4.2662, "norm_diff": 0.0229, "norm_loss": 0.0, "num_token_doc": 66.7772, "num_token_overlap": 11.6688, "num_token_query": 31.4436, "num_token_union": 65.1446, "num_word_context": 202.4643, "num_word_doc": 49.7952, "num_word_query": 23.3599, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2088.8499, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8291, "query_norm": 1.7857, "queue_k_norm": 1.7639, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4436, "sent_len_1": 66.7772, "sent_len_max_0": 127.5938, "sent_len_max_1": 191.7488, "stdk": 0.0467, "stdq": 0.0443, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 15900 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 4.2382, "doc_norm": 1.7507, "encoder_q-embeddings": 3483.6301, "encoder_q-layer.0": 2601.4009, "encoder_q-layer.1": 2653.6423, "encoder_q-layer.10": 229.9957, "encoder_q-layer.11": 649.6105, "encoder_q-layer.2": 3082.709, "encoder_q-layer.3": 3539.8445, "encoder_q-layer.4": 4168.9673, "encoder_q-layer.5": 3251.4143, "encoder_q-layer.6": 1660.3065, "encoder_q-layer.7": 881.8855, "encoder_q-layer.8": 723.8781, "encoder_q-layer.9": 346.6394, "epoch": 0.1, "inbatch_neg_score": 0.7391, "inbatch_pos_score": 1.29, "learning_rate": 4.666666666666667e-05, "loss": 4.2382, "norm_diff": 0.0193, "norm_loss": 0.0, "num_token_doc": 66.5896, "num_token_overlap": 11.6648, "num_token_query": 31.3752, "num_token_union": 65.0073, "num_word_context": 202.3022, "num_word_doc": 49.6796, "num_word_query": 23.302, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3790.4937, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7383, "query_norm": 1.739, "queue_k_norm": 1.7501, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3752, "sent_len_1": 66.5896, "sent_len_max_0": 127.5387, "sent_len_max_1": 189.0913, "stdk": 0.0465, "stdq": 0.045, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 16000 }, { "accuracy": 38.7695, "active_queue_size": 16384.0, "cl_loss": 4.2386, "doc_norm": 1.735, "encoder_q-embeddings": 2698.6553, "encoder_q-layer.0": 2129.332, "encoder_q-layer.1": 2333.4326, "encoder_q-layer.10": 219.5294, "encoder_q-layer.11": 628.6516, "encoder_q-layer.2": 2711.0769, "encoder_q-layer.3": 2559.2898, "encoder_q-layer.4": 2831.27, "encoder_q-layer.5": 1602.2108, "encoder_q-layer.6": 1021.5759, "encoder_q-layer.7": 858.0173, "encoder_q-layer.8": 598.4726, "encoder_q-layer.9": 319.2681, "epoch": 0.1, "inbatch_neg_score": 0.7392, "inbatch_pos_score": 1.249, "learning_rate": 4.6611111111111114e-05, "loss": 4.2386, "norm_diff": 0.0495, "norm_loss": 0.0, "num_token_doc": 66.6739, "num_token_overlap": 11.6899, "num_token_query": 31.4816, "num_token_union": 65.1406, "num_word_context": 202.0516, "num_word_doc": 49.7873, "num_word_query": 23.3875, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2860.9044, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7363, "query_norm": 1.6855, "queue_k_norm": 1.735, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4816, "sent_len_1": 66.6739, "sent_len_max_0": 127.4325, "sent_len_max_1": 188.7612, "stdk": 0.0467, "stdq": 0.0434, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 16100 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 4.2803, "doc_norm": 1.7168, "encoder_q-embeddings": 667.4232, "encoder_q-layer.0": 520.1815, "encoder_q-layer.1": 626.0891, "encoder_q-layer.10": 190.0423, "encoder_q-layer.11": 584.774, "encoder_q-layer.2": 758.3527, "encoder_q-layer.3": 771.2419, "encoder_q-layer.4": 814.461, "encoder_q-layer.5": 783.7734, "encoder_q-layer.6": 763.1541, "encoder_q-layer.7": 657.0662, "encoder_q-layer.8": 428.723, "encoder_q-layer.9": 277.2421, "epoch": 0.11, "inbatch_neg_score": 0.772, "inbatch_pos_score": 1.2832, "learning_rate": 4.6555555555555556e-05, "loss": 4.2803, "norm_diff": 0.0496, "norm_loss": 0.0, "num_token_doc": 66.7418, "num_token_overlap": 11.6646, "num_token_query": 31.3494, "num_token_union": 65.0749, "num_word_context": 202.2836, "num_word_doc": 49.8279, "num_word_query": 23.2864, "postclip_grad_norm": 1.0, "preclip_grad_norm": 925.7437, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7686, "query_norm": 1.6672, "queue_k_norm": 1.7176, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3494, "sent_len_1": 66.7418, "sent_len_max_0": 127.5288, "sent_len_max_1": 189.1838, "stdk": 0.0465, "stdq": 0.0435, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 16200 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.2412, "doc_norm": 1.6936, "encoder_q-embeddings": 1884.8444, "encoder_q-layer.0": 1330.3751, "encoder_q-layer.1": 1539.6855, "encoder_q-layer.10": 238.6303, "encoder_q-layer.11": 641.5864, "encoder_q-layer.2": 1886.5292, "encoder_q-layer.3": 1858.9944, "encoder_q-layer.4": 1993.7457, "encoder_q-layer.5": 1983.0143, "encoder_q-layer.6": 1549.5593, "encoder_q-layer.7": 1187.807, "encoder_q-layer.8": 672.8042, "encoder_q-layer.9": 284.7604, "epoch": 0.11, "inbatch_neg_score": 0.7554, "inbatch_pos_score": 1.2812, "learning_rate": 4.6500000000000005e-05, "loss": 4.2412, "norm_diff": 0.0219, "norm_loss": 0.0, "num_token_doc": 66.6847, "num_token_overlap": 11.6871, "num_token_query": 31.4711, "num_token_union": 65.1145, "num_word_context": 202.1964, "num_word_doc": 49.7317, "num_word_query": 23.378, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2179.5302, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7529, "query_norm": 1.6797, "queue_k_norm": 1.7007, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4711, "sent_len_1": 66.6847, "sent_len_max_0": 127.615, "sent_len_max_1": 190.4525, "stdk": 0.0463, "stdq": 0.0447, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 16300 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 4.2724, "doc_norm": 1.687, "encoder_q-embeddings": 2658.1655, "encoder_q-layer.0": 2261.092, "encoder_q-layer.1": 2392.5854, "encoder_q-layer.10": 201.7823, "encoder_q-layer.11": 651.4272, "encoder_q-layer.2": 2460.8511, "encoder_q-layer.3": 2329.0679, "encoder_q-layer.4": 2396.5139, "encoder_q-layer.5": 2259.5857, "encoder_q-layer.6": 1171.9282, "encoder_q-layer.7": 734.312, "encoder_q-layer.8": 402.3764, "encoder_q-layer.9": 216.7063, "epoch": 0.11, "inbatch_neg_score": 0.7328, "inbatch_pos_score": 1.2578, "learning_rate": 4.644444444444445e-05, "loss": 4.2724, "norm_diff": 0.0458, "norm_loss": 0.0, "num_token_doc": 66.976, "num_token_overlap": 11.6342, "num_token_query": 31.2551, "num_token_union": 65.2409, "num_word_context": 202.552, "num_word_doc": 49.9953, "num_word_query": 23.245, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2786.8991, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7314, "query_norm": 1.6412, "queue_k_norm": 1.6842, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2551, "sent_len_1": 66.976, "sent_len_max_0": 127.3312, "sent_len_max_1": 189.3338, "stdk": 0.0467, "stdq": 0.0441, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 16400 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 4.2422, "doc_norm": 1.6707, "encoder_q-embeddings": 798.9047, "encoder_q-layer.0": 633.4944, "encoder_q-layer.1": 655.2357, "encoder_q-layer.10": 196.6147, "encoder_q-layer.11": 716.8501, "encoder_q-layer.2": 727.1855, "encoder_q-layer.3": 760.8336, "encoder_q-layer.4": 774.1214, "encoder_q-layer.5": 774.7195, "encoder_q-layer.6": 617.7518, "encoder_q-layer.7": 432.7635, "encoder_q-layer.8": 297.2336, "encoder_q-layer.9": 196.0359, "epoch": 0.11, "inbatch_neg_score": 0.7541, "inbatch_pos_score": 1.2676, "learning_rate": 4.638888888888889e-05, "loss": 4.2422, "norm_diff": 0.0548, "norm_loss": 0.0, "num_token_doc": 66.8784, "num_token_overlap": 11.6924, "num_token_query": 31.5004, "num_token_union": 65.2715, "num_word_context": 202.4195, "num_word_doc": 49.895, "num_word_query": 23.3978, "postclip_grad_norm": 1.0, "preclip_grad_norm": 938.9941, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7544, "query_norm": 1.6159, "queue_k_norm": 1.6691, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5004, "sent_len_1": 66.8784, "sent_len_max_0": 127.3388, "sent_len_max_1": 190.795, "stdk": 0.0467, "stdq": 0.043, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 16500 }, { "accuracy": 42.8711, "active_queue_size": 16384.0, "cl_loss": 4.2482, "doc_norm": 1.6603, "encoder_q-embeddings": 604.8909, "encoder_q-layer.0": 425.2163, "encoder_q-layer.1": 466.14, "encoder_q-layer.10": 208.6456, "encoder_q-layer.11": 659.5754, "encoder_q-layer.2": 533.4236, "encoder_q-layer.3": 544.7939, "encoder_q-layer.4": 543.345, "encoder_q-layer.5": 557.6983, "encoder_q-layer.6": 447.3969, "encoder_q-layer.7": 297.2392, "encoder_q-layer.8": 230.9496, "encoder_q-layer.9": 189.1839, "epoch": 0.11, "inbatch_neg_score": 0.6781, "inbatch_pos_score": 1.2178, "learning_rate": 4.633333333333333e-05, "loss": 4.2482, "norm_diff": 0.0412, "norm_loss": 0.0, "num_token_doc": 66.4879, "num_token_overlap": 11.7453, "num_token_query": 31.4778, "num_token_union": 64.9621, "num_word_context": 201.6125, "num_word_doc": 49.6313, "num_word_query": 23.3741, "postclip_grad_norm": 1.0, "preclip_grad_norm": 706.7742, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6797, "query_norm": 1.6191, "queue_k_norm": 1.6527, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4778, "sent_len_1": 66.4879, "sent_len_max_0": 127.47, "sent_len_max_1": 188.405, "stdk": 0.0468, "stdq": 0.044, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 16600 }, { "accuracy": 39.6484, "active_queue_size": 16384.0, "cl_loss": 4.2824, "doc_norm": 1.6368, "encoder_q-embeddings": 1319.692, "encoder_q-layer.0": 1004.852, "encoder_q-layer.1": 1106.1553, "encoder_q-layer.10": 208.2458, "encoder_q-layer.11": 716.65, "encoder_q-layer.2": 1337.1814, "encoder_q-layer.3": 1254.5891, "encoder_q-layer.4": 1127.8882, "encoder_q-layer.5": 1111.2172, "encoder_q-layer.6": 1111.9445, "encoder_q-layer.7": 972.2653, "encoder_q-layer.8": 567.0342, "encoder_q-layer.9": 240.8259, "epoch": 0.11, "inbatch_neg_score": 0.6255, "inbatch_pos_score": 1.1367, "learning_rate": 4.627777777777778e-05, "loss": 4.2824, "norm_diff": 0.0213, "norm_loss": 0.0, "num_token_doc": 66.634, "num_token_overlap": 11.6483, "num_token_query": 31.3581, "num_token_union": 65.0441, "num_word_context": 201.9288, "num_word_doc": 49.7194, "num_word_query": 23.2996, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1496.4895, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.624, "query_norm": 1.6546, "queue_k_norm": 1.6386, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3581, "sent_len_1": 66.634, "sent_len_max_0": 127.2537, "sent_len_max_1": 188.7138, "stdk": 0.0463, "stdq": 0.044, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 16700 }, { "accuracy": 40.332, "active_queue_size": 16384.0, "cl_loss": 4.2541, "doc_norm": 1.6189, "encoder_q-embeddings": 1163.5148, "encoder_q-layer.0": 852.7537, "encoder_q-layer.1": 903.259, "encoder_q-layer.10": 211.4896, "encoder_q-layer.11": 622.1021, "encoder_q-layer.2": 929.2958, "encoder_q-layer.3": 943.7707, "encoder_q-layer.4": 937.643, "encoder_q-layer.5": 867.1995, "encoder_q-layer.6": 815.792, "encoder_q-layer.7": 637.0395, "encoder_q-layer.8": 433.515, "encoder_q-layer.9": 236.0104, "epoch": 0.11, "inbatch_neg_score": 0.6505, "inbatch_pos_score": 1.1689, "learning_rate": 4.6222222222222224e-05, "loss": 4.2541, "norm_diff": 0.0333, "norm_loss": 0.0, "num_token_doc": 66.6645, "num_token_overlap": 11.6493, "num_token_query": 31.2971, "num_token_union": 65.0275, "num_word_context": 202.0989, "num_word_doc": 49.746, "num_word_query": 23.2414, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1195.2295, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6489, "query_norm": 1.5867, "queue_k_norm": 1.6248, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2971, "sent_len_1": 66.6645, "sent_len_max_0": 127.4925, "sent_len_max_1": 189.315, "stdk": 0.046, "stdq": 0.0431, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 16800 }, { "accuracy": 38.4766, "active_queue_size": 16384.0, "cl_loss": 4.2671, "doc_norm": 1.6085, "encoder_q-embeddings": 263.9723, "encoder_q-layer.0": 186.9646, "encoder_q-layer.1": 211.3142, "encoder_q-layer.10": 208.0445, "encoder_q-layer.11": 689.6232, "encoder_q-layer.2": 229.6469, "encoder_q-layer.3": 244.1386, "encoder_q-layer.4": 239.0633, "encoder_q-layer.5": 230.6783, "encoder_q-layer.6": 251.478, "encoder_q-layer.7": 250.8196, "encoder_q-layer.8": 245.1952, "encoder_q-layer.9": 183.4241, "epoch": 0.11, "inbatch_neg_score": 0.6072, "inbatch_pos_score": 1.0889, "learning_rate": 4.6166666666666666e-05, "loss": 4.2671, "norm_diff": 0.0125, "norm_loss": 0.0, "num_token_doc": 66.7975, "num_token_overlap": 11.6964, "num_token_query": 31.4009, "num_token_union": 65.1442, "num_word_context": 202.1595, "num_word_doc": 49.8258, "num_word_query": 23.3218, "postclip_grad_norm": 1.0, "preclip_grad_norm": 440.201, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.603, "query_norm": 1.6126, "queue_k_norm": 1.6127, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4009, "sent_len_1": 66.7975, "sent_len_max_0": 127.545, "sent_len_max_1": 189.4363, "stdk": 0.046, "stdq": 0.0431, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 16900 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 4.25, "doc_norm": 1.6045, "encoder_q-embeddings": 1103.1642, "encoder_q-layer.0": 771.8039, "encoder_q-layer.1": 926.328, "encoder_q-layer.10": 223.1965, "encoder_q-layer.11": 611.8348, "encoder_q-layer.2": 955.7695, "encoder_q-layer.3": 1002.2059, "encoder_q-layer.4": 1080.4479, "encoder_q-layer.5": 857.8118, "encoder_q-layer.6": 706.7158, "encoder_q-layer.7": 544.9334, "encoder_q-layer.8": 333.3108, "encoder_q-layer.9": 180.5781, "epoch": 0.11, "inbatch_neg_score": 0.5987, "inbatch_pos_score": 1.1162, "learning_rate": 4.6111111111111115e-05, "loss": 4.25, "norm_diff": 0.0155, "norm_loss": 0.0, "num_token_doc": 66.8147, "num_token_overlap": 11.6527, "num_token_query": 31.345, "num_token_union": 65.0976, "num_word_context": 202.3556, "num_word_doc": 49.8044, "num_word_query": 23.2829, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1180.9746, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5977, "query_norm": 1.5944, "queue_k_norm": 1.6021, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.345, "sent_len_1": 66.8147, "sent_len_max_0": 127.5288, "sent_len_max_1": 191.5462, "stdk": 0.0464, "stdq": 0.0427, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 17000 }, { "accuracy": 40.5273, "active_queue_size": 16384.0, "cl_loss": 4.2576, "doc_norm": 1.594, "encoder_q-embeddings": 1379.1226, "encoder_q-layer.0": 965.9816, "encoder_q-layer.1": 1101.8104, "encoder_q-layer.10": 223.8643, "encoder_q-layer.11": 643.1541, "encoder_q-layer.2": 1336.0505, "encoder_q-layer.3": 1459.0364, "encoder_q-layer.4": 1539.1394, "encoder_q-layer.5": 1571.4678, "encoder_q-layer.6": 1468.1473, "encoder_q-layer.7": 1283.8665, "encoder_q-layer.8": 985.4331, "encoder_q-layer.9": 391.0526, "epoch": 0.11, "inbatch_neg_score": 0.5706, "inbatch_pos_score": 1.083, "learning_rate": 4.605555555555556e-05, "loss": 4.2576, "norm_diff": 0.0297, "norm_loss": 0.0, "num_token_doc": 66.6852, "num_token_overlap": 11.6174, "num_token_query": 31.3277, "num_token_union": 65.0802, "num_word_context": 202.0666, "num_word_doc": 49.7103, "num_word_query": 23.2664, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1727.1639, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5703, "query_norm": 1.6238, "queue_k_norm": 1.5911, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3277, "sent_len_1": 66.6852, "sent_len_max_0": 127.6625, "sent_len_max_1": 192.185, "stdk": 0.0464, "stdq": 0.0436, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 17100 }, { "accuracy": 41.8945, "active_queue_size": 16384.0, "cl_loss": 4.2135, "doc_norm": 1.5794, "encoder_q-embeddings": 3135.8042, "encoder_q-layer.0": 2238.6748, "encoder_q-layer.1": 2596.7671, "encoder_q-layer.10": 397.4492, "encoder_q-layer.11": 1149.2277, "encoder_q-layer.2": 2931.7129, "encoder_q-layer.3": 3145.6921, "encoder_q-layer.4": 3183.7551, "encoder_q-layer.5": 3459.0728, "encoder_q-layer.6": 3232.4441, "encoder_q-layer.7": 3010.6877, "encoder_q-layer.8": 1807.5581, "encoder_q-layer.9": 556.6416, "epoch": 0.11, "inbatch_neg_score": 0.5665, "inbatch_pos_score": 1.084, "learning_rate": 4.600000000000001e-05, "loss": 4.2135, "norm_diff": 0.0515, "norm_loss": 0.0, "num_token_doc": 66.86, "num_token_overlap": 11.6836, "num_token_query": 31.4589, "num_token_union": 65.2075, "num_word_context": 202.5438, "num_word_doc": 49.8778, "num_word_query": 23.3513, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3816.4852, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5649, "query_norm": 1.6309, "queue_k_norm": 1.5787, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4589, "sent_len_1": 66.86, "sent_len_max_0": 127.4712, "sent_len_max_1": 189.7138, "stdk": 0.0462, "stdq": 0.0433, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 17200 }, { "accuracy": 41.5039, "active_queue_size": 16384.0, "cl_loss": 4.2519, "doc_norm": 1.5686, "encoder_q-embeddings": 1462.3073, "encoder_q-layer.0": 1081.7458, "encoder_q-layer.1": 1182.1586, "encoder_q-layer.10": 204.0024, "encoder_q-layer.11": 548.2299, "encoder_q-layer.2": 1383.7407, "encoder_q-layer.3": 1429.407, "encoder_q-layer.4": 1472.5881, "encoder_q-layer.5": 1597.9558, "encoder_q-layer.6": 1413.0806, "encoder_q-layer.7": 1189.8993, "encoder_q-layer.8": 755.7764, "encoder_q-layer.9": 302.4025, "epoch": 0.11, "inbatch_neg_score": 0.5565, "inbatch_pos_score": 1.0732, "learning_rate": 4.594444444444444e-05, "loss": 4.2519, "norm_diff": 0.0428, "norm_loss": 0.0, "num_token_doc": 66.7638, "num_token_overlap": 11.6836, "num_token_query": 31.5012, "num_token_union": 65.1847, "num_word_context": 202.1829, "num_word_doc": 49.7814, "num_word_query": 23.3976, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1738.9169, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5557, "query_norm": 1.6114, "queue_k_norm": 1.5692, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5012, "sent_len_1": 66.7638, "sent_len_max_0": 127.5413, "sent_len_max_1": 190.2587, "stdk": 0.0461, "stdq": 0.0432, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 17300 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.2278, "doc_norm": 1.5567, "encoder_q-embeddings": 1138.1283, "encoder_q-layer.0": 812.1297, "encoder_q-layer.1": 834.5698, "encoder_q-layer.10": 223.3985, "encoder_q-layer.11": 617.3204, "encoder_q-layer.2": 939.5809, "encoder_q-layer.3": 973.2219, "encoder_q-layer.4": 967.8185, "encoder_q-layer.5": 955.0558, "encoder_q-layer.6": 854.9283, "encoder_q-layer.7": 753.3679, "encoder_q-layer.8": 579.6629, "encoder_q-layer.9": 264.7686, "epoch": 0.11, "inbatch_neg_score": 0.5313, "inbatch_pos_score": 1.043, "learning_rate": 4.588888888888889e-05, "loss": 4.2278, "norm_diff": 0.079, "norm_loss": 0.0, "num_token_doc": 66.8162, "num_token_overlap": 11.7459, "num_token_query": 31.5469, "num_token_union": 65.2137, "num_word_context": 202.5579, "num_word_doc": 49.8561, "num_word_query": 23.441, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1209.1847, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5288, "query_norm": 1.6357, "queue_k_norm": 1.5608, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5469, "sent_len_1": 66.8162, "sent_len_max_0": 127.54, "sent_len_max_1": 190.26, "stdk": 0.0459, "stdq": 0.0423, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 17400 }, { "accuracy": 41.1133, "active_queue_size": 16384.0, "cl_loss": 4.2006, "doc_norm": 1.5507, "encoder_q-embeddings": 1590.0712, "encoder_q-layer.0": 1167.3481, "encoder_q-layer.1": 1133.5717, "encoder_q-layer.10": 217.4686, "encoder_q-layer.11": 624.7719, "encoder_q-layer.2": 1235.4576, "encoder_q-layer.3": 1195.4689, "encoder_q-layer.4": 1153.1351, "encoder_q-layer.5": 1045.4783, "encoder_q-layer.6": 920.6088, "encoder_q-layer.7": 970.742, "encoder_q-layer.8": 610.5477, "encoder_q-layer.9": 253.4635, "epoch": 0.11, "inbatch_neg_score": 0.5177, "inbatch_pos_score": 1.0342, "learning_rate": 4.5833333333333334e-05, "loss": 4.2006, "norm_diff": 0.1484, "norm_loss": 0.0, "num_token_doc": 66.8827, "num_token_overlap": 11.6989, "num_token_query": 31.4907, "num_token_union": 65.2407, "num_word_context": 202.3802, "num_word_doc": 49.9176, "num_word_query": 23.3892, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1569.7076, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5151, "query_norm": 1.699, "queue_k_norm": 1.5495, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4907, "sent_len_1": 66.8827, "sent_len_max_0": 127.385, "sent_len_max_1": 188.1513, "stdk": 0.046, "stdq": 0.0436, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 17500 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.2307, "doc_norm": 1.5438, "encoder_q-embeddings": 1079.1548, "encoder_q-layer.0": 749.9993, "encoder_q-layer.1": 839.1787, "encoder_q-layer.10": 195.3532, "encoder_q-layer.11": 616.3135, "encoder_q-layer.2": 974.592, "encoder_q-layer.3": 1055.0204, "encoder_q-layer.4": 1021.3931, "encoder_q-layer.5": 1071.1117, "encoder_q-layer.6": 1058.7191, "encoder_q-layer.7": 1006.5914, "encoder_q-layer.8": 584.1479, "encoder_q-layer.9": 219.3909, "epoch": 0.11, "inbatch_neg_score": 0.5463, "inbatch_pos_score": 1.0781, "learning_rate": 4.577777777777778e-05, "loss": 4.2307, "norm_diff": 0.1535, "norm_loss": 0.0, "num_token_doc": 66.8044, "num_token_overlap": 11.6077, "num_token_query": 31.1508, "num_token_union": 65.0283, "num_word_context": 202.2022, "num_word_doc": 49.862, "num_word_query": 23.1128, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1301.8984, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5425, "query_norm": 1.6973, "queue_k_norm": 1.5409, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.1508, "sent_len_1": 66.8044, "sent_len_max_0": 127.2613, "sent_len_max_1": 190.72, "stdk": 0.0461, "stdq": 0.0442, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 17600 }, { "accuracy": 40.0391, "active_queue_size": 16384.0, "cl_loss": 4.2054, "doc_norm": 1.5285, "encoder_q-embeddings": 1953.5032, "encoder_q-layer.0": 1535.7324, "encoder_q-layer.1": 1587.6436, "encoder_q-layer.10": 224.5137, "encoder_q-layer.11": 579.0494, "encoder_q-layer.2": 1330.9165, "encoder_q-layer.3": 1282.3044, "encoder_q-layer.4": 1334.4121, "encoder_q-layer.5": 1236.0892, "encoder_q-layer.6": 1048.203, "encoder_q-layer.7": 786.4402, "encoder_q-layer.8": 425.8942, "encoder_q-layer.9": 211.4785, "epoch": 0.12, "inbatch_neg_score": 0.5232, "inbatch_pos_score": 1.0332, "learning_rate": 4.572222222222222e-05, "loss": 4.2054, "norm_diff": 0.1612, "norm_loss": 0.0, "num_token_doc": 66.8889, "num_token_overlap": 11.6665, "num_token_query": 31.4584, "num_token_union": 65.2664, "num_word_context": 202.6066, "num_word_doc": 49.9449, "num_word_query": 23.375, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1806.1484, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.52, "query_norm": 1.6897, "queue_k_norm": 1.5334, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4584, "sent_len_1": 66.8889, "sent_len_max_0": 127.4762, "sent_len_max_1": 188.6625, "stdk": 0.0457, "stdq": 0.0435, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 17700 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 4.1848, "doc_norm": 1.5294, "encoder_q-embeddings": 1319.1027, "encoder_q-layer.0": 935.7455, "encoder_q-layer.1": 1043.6331, "encoder_q-layer.10": 186.8292, "encoder_q-layer.11": 607.4414, "encoder_q-layer.2": 1244.5939, "encoder_q-layer.3": 1310.6504, "encoder_q-layer.4": 1378.4271, "encoder_q-layer.5": 1231.0133, "encoder_q-layer.6": 1199.5972, "encoder_q-layer.7": 1084.156, "encoder_q-layer.8": 665.7877, "encoder_q-layer.9": 205.2007, "epoch": 0.12, "inbatch_neg_score": 0.5096, "inbatch_pos_score": 1.0195, "learning_rate": 4.566666666666667e-05, "loss": 4.1848, "norm_diff": 0.2217, "norm_loss": 0.0, "num_token_doc": 66.9736, "num_token_overlap": 11.6836, "num_token_query": 31.4036, "num_token_union": 65.2327, "num_word_context": 202.305, "num_word_doc": 49.9397, "num_word_query": 23.3388, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1548.0754, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5059, "query_norm": 1.7512, "queue_k_norm": 1.5269, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4036, "sent_len_1": 66.9736, "sent_len_max_0": 127.5012, "sent_len_max_1": 190.5563, "stdk": 0.046, "stdq": 0.0427, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 17800 }, { "accuracy": 40.4297, "active_queue_size": 16384.0, "cl_loss": 4.2139, "doc_norm": 1.5185, "encoder_q-embeddings": 2163.0034, "encoder_q-layer.0": 1624.0087, "encoder_q-layer.1": 1708.066, "encoder_q-layer.10": 208.0486, "encoder_q-layer.11": 562.8015, "encoder_q-layer.2": 2098.0181, "encoder_q-layer.3": 2319.627, "encoder_q-layer.4": 2587.8538, "encoder_q-layer.5": 2450.9497, "encoder_q-layer.6": 2459.5215, "encoder_q-layer.7": 2618.1814, "encoder_q-layer.8": 2057.5352, "encoder_q-layer.9": 633.7413, "epoch": 0.12, "inbatch_neg_score": 0.5153, "inbatch_pos_score": 1.0234, "learning_rate": 4.561111111111112e-05, "loss": 4.2139, "norm_diff": 0.2647, "norm_loss": 0.0, "num_token_doc": 66.6562, "num_token_overlap": 11.6345, "num_token_query": 31.2704, "num_token_union": 65.0178, "num_word_context": 202.2443, "num_word_doc": 49.7139, "num_word_query": 23.2385, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2929.3623, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5112, "query_norm": 1.7832, "queue_k_norm": 1.5231, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2704, "sent_len_1": 66.6562, "sent_len_max_0": 127.4813, "sent_len_max_1": 190.1475, "stdk": 0.0458, "stdq": 0.0431, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 17900 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 4.191, "doc_norm": 1.5218, "encoder_q-embeddings": 583.9647, "encoder_q-layer.0": 418.1006, "encoder_q-layer.1": 484.5695, "encoder_q-layer.10": 187.8626, "encoder_q-layer.11": 513.1733, "encoder_q-layer.2": 561.9665, "encoder_q-layer.3": 633.9935, "encoder_q-layer.4": 661.8228, "encoder_q-layer.5": 658.3102, "encoder_q-layer.6": 631.0103, "encoder_q-layer.7": 583.9705, "encoder_q-layer.8": 323.8501, "encoder_q-layer.9": 182.183, "epoch": 0.12, "inbatch_neg_score": 0.5474, "inbatch_pos_score": 1.0771, "learning_rate": 4.555555555555556e-05, "loss": 4.191, "norm_diff": 0.2921, "norm_loss": 0.0, "num_token_doc": 66.8174, "num_token_overlap": 11.6662, "num_token_query": 31.4644, "num_token_union": 65.2192, "num_word_context": 202.1461, "num_word_doc": 49.8945, "num_word_query": 23.3773, "postclip_grad_norm": 1.0, "preclip_grad_norm": 767.8677, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5425, "query_norm": 1.8139, "queue_k_norm": 1.5203, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4644, "sent_len_1": 66.8174, "sent_len_max_0": 127.4737, "sent_len_max_1": 189.6325, "stdk": 0.0461, "stdq": 0.0431, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 18000 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.1832, "doc_norm": 1.5125, "encoder_q-embeddings": 567.1159, "encoder_q-layer.0": 408.3996, "encoder_q-layer.1": 450.0208, "encoder_q-layer.10": 186.0946, "encoder_q-layer.11": 574.9482, "encoder_q-layer.2": 527.7112, "encoder_q-layer.3": 612.2739, "encoder_q-layer.4": 702.9437, "encoder_q-layer.5": 613.3984, "encoder_q-layer.6": 714.491, "encoder_q-layer.7": 691.6038, "encoder_q-layer.8": 580.0408, "encoder_q-layer.9": 312.4978, "epoch": 0.12, "inbatch_neg_score": 0.5337, "inbatch_pos_score": 1.0596, "learning_rate": 4.55e-05, "loss": 4.1832, "norm_diff": 0.3106, "norm_loss": 0.0, "num_token_doc": 66.7244, "num_token_overlap": 11.7024, "num_token_query": 31.3858, "num_token_union": 65.0825, "num_word_context": 202.3736, "num_word_doc": 49.7634, "num_word_query": 23.2951, "postclip_grad_norm": 1.0, "preclip_grad_norm": 812.4907, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5264, "query_norm": 1.8231, "queue_k_norm": 1.518, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3858, "sent_len_1": 66.7244, "sent_len_max_0": 127.6363, "sent_len_max_1": 190.7237, "stdk": 0.0459, "stdq": 0.043, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 18100 }, { "accuracy": 42.4805, "active_queue_size": 16384.0, "cl_loss": 4.1706, "doc_norm": 1.5158, "encoder_q-embeddings": 5701.2808, "encoder_q-layer.0": 4251.3379, "encoder_q-layer.1": 4438.707, "encoder_q-layer.10": 225.6412, "encoder_q-layer.11": 484.337, "encoder_q-layer.2": 5070.498, "encoder_q-layer.3": 5316.3154, "encoder_q-layer.4": 5177.5063, "encoder_q-layer.5": 5324.002, "encoder_q-layer.6": 4800.3506, "encoder_q-layer.7": 4599.7764, "encoder_q-layer.8": 3698.5168, "encoder_q-layer.9": 1309.8533, "epoch": 0.12, "inbatch_neg_score": 0.5717, "inbatch_pos_score": 1.0928, "learning_rate": 4.5444444444444444e-05, "loss": 4.1706, "norm_diff": 0.2683, "norm_loss": 0.0, "num_token_doc": 66.7099, "num_token_overlap": 11.6704, "num_token_query": 31.2929, "num_token_union": 65.0121, "num_word_context": 201.9004, "num_word_doc": 49.8066, "num_word_query": 23.2607, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6630.2693, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5669, "query_norm": 1.7841, "queue_k_norm": 1.516, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2929, "sent_len_1": 66.7099, "sent_len_max_0": 127.5113, "sent_len_max_1": 187.8438, "stdk": 0.0461, "stdq": 0.0425, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 18200 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.1817, "doc_norm": 1.5126, "encoder_q-embeddings": 873.8243, "encoder_q-layer.0": 615.8827, "encoder_q-layer.1": 653.2061, "encoder_q-layer.10": 184.8101, "encoder_q-layer.11": 521.8152, "encoder_q-layer.2": 730.134, "encoder_q-layer.3": 763.1146, "encoder_q-layer.4": 843.5138, "encoder_q-layer.5": 804.5769, "encoder_q-layer.6": 838.3747, "encoder_q-layer.7": 819.2819, "encoder_q-layer.8": 547.2448, "encoder_q-layer.9": 227.9767, "epoch": 0.12, "inbatch_neg_score": 0.6234, "inbatch_pos_score": 1.1416, "learning_rate": 4.538888888888889e-05, "loss": 4.1817, "norm_diff": 0.3003, "norm_loss": 0.0, "num_token_doc": 66.5902, "num_token_overlap": 11.6558, "num_token_query": 31.3288, "num_token_union": 65.0196, "num_word_context": 202.4449, "num_word_doc": 49.7397, "num_word_query": 23.2814, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1023.3112, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6172, "query_norm": 1.8129, "queue_k_norm": 1.5166, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3288, "sent_len_1": 66.5902, "sent_len_max_0": 127.4638, "sent_len_max_1": 186.01, "stdk": 0.046, "stdq": 0.0423, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 18300 }, { "accuracy": 42.7734, "active_queue_size": 16384.0, "cl_loss": 4.189, "doc_norm": 1.5191, "encoder_q-embeddings": 1168.3657, "encoder_q-layer.0": 810.6863, "encoder_q-layer.1": 912.689, "encoder_q-layer.10": 190.0394, "encoder_q-layer.11": 538.724, "encoder_q-layer.2": 1123.2767, "encoder_q-layer.3": 1112.7416, "encoder_q-layer.4": 1136.2906, "encoder_q-layer.5": 1044.5286, "encoder_q-layer.6": 1005.5577, "encoder_q-layer.7": 791.9281, "encoder_q-layer.8": 377.3513, "encoder_q-layer.9": 192.8172, "epoch": 0.12, "inbatch_neg_score": 0.5959, "inbatch_pos_score": 1.1191, "learning_rate": 4.5333333333333335e-05, "loss": 4.189, "norm_diff": 0.2672, "norm_loss": 0.0, "num_token_doc": 66.7095, "num_token_overlap": 11.6954, "num_token_query": 31.3062, "num_token_union": 64.9956, "num_word_context": 202.2267, "num_word_doc": 49.7559, "num_word_query": 23.2301, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1314.8605, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5928, "query_norm": 1.7863, "queue_k_norm": 1.5158, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3062, "sent_len_1": 66.7095, "sent_len_max_0": 127.42, "sent_len_max_1": 190.4938, "stdk": 0.0463, "stdq": 0.0428, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 18400 }, { "accuracy": 42.7734, "active_queue_size": 16384.0, "cl_loss": 4.1495, "doc_norm": 1.5183, "encoder_q-embeddings": 1486.2554, "encoder_q-layer.0": 985.8243, "encoder_q-layer.1": 1212.4496, "encoder_q-layer.10": 220.2192, "encoder_q-layer.11": 542.0, "encoder_q-layer.2": 1099.9297, "encoder_q-layer.3": 1032.0298, "encoder_q-layer.4": 978.4272, "encoder_q-layer.5": 816.8214, "encoder_q-layer.6": 643.3143, "encoder_q-layer.7": 547.6419, "encoder_q-layer.8": 322.3432, "encoder_q-layer.9": 187.032, "epoch": 0.12, "inbatch_neg_score": 0.5825, "inbatch_pos_score": 1.1025, "learning_rate": 4.527777777777778e-05, "loss": 4.1495, "norm_diff": 0.1844, "norm_loss": 0.0, "num_token_doc": 66.976, "num_token_overlap": 11.6999, "num_token_query": 31.4433, "num_token_union": 65.2667, "num_word_context": 202.6252, "num_word_doc": 49.9737, "num_word_query": 23.3587, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1344.9822, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5786, "query_norm": 1.7027, "queue_k_norm": 1.5168, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4433, "sent_len_1": 66.976, "sent_len_max_0": 127.4725, "sent_len_max_1": 191.0387, "stdk": 0.0463, "stdq": 0.0422, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 18500 }, { "accuracy": 40.7227, "active_queue_size": 16384.0, "cl_loss": 4.2165, "doc_norm": 1.5181, "encoder_q-embeddings": 693.7163, "encoder_q-layer.0": 498.7426, "encoder_q-layer.1": 573.3997, "encoder_q-layer.10": 179.8078, "encoder_q-layer.11": 493.9809, "encoder_q-layer.2": 615.0004, "encoder_q-layer.3": 639.9226, "encoder_q-layer.4": 684.5294, "encoder_q-layer.5": 676.4491, "encoder_q-layer.6": 709.2899, "encoder_q-layer.7": 663.871, "encoder_q-layer.8": 412.4324, "encoder_q-layer.9": 170.2699, "epoch": 0.12, "inbatch_neg_score": 0.591, "inbatch_pos_score": 1.0996, "learning_rate": 4.522222222222223e-05, "loss": 4.2165, "norm_diff": 0.166, "norm_loss": 0.0, "num_token_doc": 66.7857, "num_token_overlap": 11.7045, "num_token_query": 31.5221, "num_token_union": 65.1895, "num_word_context": 202.3437, "num_word_doc": 49.8715, "num_word_query": 23.4325, "postclip_grad_norm": 1.0, "preclip_grad_norm": 842.695, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5869, "query_norm": 1.6841, "queue_k_norm": 1.5142, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5221, "sent_len_1": 66.7857, "sent_len_max_0": 127.6412, "sent_len_max_1": 189.2925, "stdk": 0.0463, "stdq": 0.0421, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 18600 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 4.1889, "doc_norm": 1.5107, "encoder_q-embeddings": 846.6622, "encoder_q-layer.0": 617.197, "encoder_q-layer.1": 712.4802, "encoder_q-layer.10": 179.6455, "encoder_q-layer.11": 504.6741, "encoder_q-layer.2": 888.3928, "encoder_q-layer.3": 897.4863, "encoder_q-layer.4": 988.4854, "encoder_q-layer.5": 972.5625, "encoder_q-layer.6": 801.8391, "encoder_q-layer.7": 632.6292, "encoder_q-layer.8": 342.9521, "encoder_q-layer.9": 180.9817, "epoch": 0.12, "inbatch_neg_score": 0.5316, "inbatch_pos_score": 1.0498, "learning_rate": 4.516666666666667e-05, "loss": 4.1889, "norm_diff": 0.1271, "norm_loss": 0.0, "num_token_doc": 66.8217, "num_token_overlap": 11.752, "num_token_query": 31.5328, "num_token_union": 65.1826, "num_word_context": 202.5614, "num_word_doc": 49.8459, "num_word_query": 23.4088, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1053.2631, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5269, "query_norm": 1.6378, "queue_k_norm": 1.5145, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.5328, "sent_len_1": 66.8217, "sent_len_max_0": 127.4425, "sent_len_max_1": 188.0387, "stdk": 0.0461, "stdq": 0.0424, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 18700 }, { "accuracy": 40.4297, "active_queue_size": 16384.0, "cl_loss": 4.2228, "doc_norm": 1.5186, "encoder_q-embeddings": 3062.8003, "encoder_q-layer.0": 2253.9382, "encoder_q-layer.1": 2523.0254, "encoder_q-layer.10": 189.6774, "encoder_q-layer.11": 527.7679, "encoder_q-layer.2": 2838.728, "encoder_q-layer.3": 2997.71, "encoder_q-layer.4": 2589.4966, "encoder_q-layer.5": 1558.5857, "encoder_q-layer.6": 1136.1265, "encoder_q-layer.7": 974.2705, "encoder_q-layer.8": 565.9884, "encoder_q-layer.9": 231.9332, "epoch": 0.12, "inbatch_neg_score": 0.5538, "inbatch_pos_score": 1.0615, "learning_rate": 4.511111111111112e-05, "loss": 4.2228, "norm_diff": 0.1189, "norm_loss": 0.0, "num_token_doc": 66.7614, "num_token_overlap": 11.6827, "num_token_query": 31.4361, "num_token_union": 65.1235, "num_word_context": 202.5396, "num_word_doc": 49.8432, "num_word_query": 23.3427, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3004.7509, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5503, "query_norm": 1.6375, "queue_k_norm": 1.5104, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4361, "sent_len_1": 66.7614, "sent_len_max_0": 127.3088, "sent_len_max_1": 189.565, "stdk": 0.0465, "stdq": 0.0414, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 18800 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.2114, "doc_norm": 1.5085, "encoder_q-embeddings": 442.6506, "encoder_q-layer.0": 323.9316, "encoder_q-layer.1": 372.0541, "encoder_q-layer.10": 208.7154, "encoder_q-layer.11": 530.1886, "encoder_q-layer.2": 421.3307, "encoder_q-layer.3": 425.3022, "encoder_q-layer.4": 415.1929, "encoder_q-layer.5": 359.6095, "encoder_q-layer.6": 344.9198, "encoder_q-layer.7": 302.8657, "encoder_q-layer.8": 239.2857, "encoder_q-layer.9": 182.6664, "epoch": 0.12, "inbatch_neg_score": 0.4537, "inbatch_pos_score": 0.9707, "learning_rate": 4.5055555555555554e-05, "loss": 4.2114, "norm_diff": 0.0989, "norm_loss": 0.0, "num_token_doc": 66.9345, "num_token_overlap": 11.7105, "num_token_query": 31.3899, "num_token_union": 65.1607, "num_word_context": 202.26, "num_word_doc": 49.9863, "num_word_query": 23.3171, "postclip_grad_norm": 1.0, "preclip_grad_norm": 544.3561, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4509, "query_norm": 1.6074, "queue_k_norm": 1.507, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3899, "sent_len_1": 66.9345, "sent_len_max_0": 127.5487, "sent_len_max_1": 188.7512, "stdk": 0.0462, "stdq": 0.0432, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 18900 }, { "accuracy": 41.4062, "active_queue_size": 16384.0, "cl_loss": 4.2266, "doc_norm": 1.5064, "encoder_q-embeddings": 1588.5853, "encoder_q-layer.0": 1070.1868, "encoder_q-layer.1": 1198.4673, "encoder_q-layer.10": 172.8003, "encoder_q-layer.11": 479.9583, "encoder_q-layer.2": 1368.412, "encoder_q-layer.3": 1420.8168, "encoder_q-layer.4": 1262.0771, "encoder_q-layer.5": 1034.6265, "encoder_q-layer.6": 888.1961, "encoder_q-layer.7": 629.4075, "encoder_q-layer.8": 333.7348, "encoder_q-layer.9": 189.2795, "epoch": 0.12, "inbatch_neg_score": 0.4561, "inbatch_pos_score": 0.9673, "learning_rate": 4.5e-05, "loss": 4.2266, "norm_diff": 0.0533, "norm_loss": 0.0, "num_token_doc": 66.583, "num_token_overlap": 11.6651, "num_token_query": 31.4109, "num_token_union": 65.0336, "num_word_context": 202.1417, "num_word_doc": 49.6736, "num_word_query": 23.343, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1551.5142, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4541, "query_norm": 1.5597, "queue_k_norm": 1.5028, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4109, "sent_len_1": 66.583, "sent_len_max_0": 127.5012, "sent_len_max_1": 190.6763, "stdk": 0.0463, "stdq": 0.0421, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 19000 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 4.2022, "doc_norm": 1.4988, "encoder_q-embeddings": 1210.4263, "encoder_q-layer.0": 833.5411, "encoder_q-layer.1": 886.3105, "encoder_q-layer.10": 187.1228, "encoder_q-layer.11": 486.7809, "encoder_q-layer.2": 1083.502, "encoder_q-layer.3": 1095.576, "encoder_q-layer.4": 1130.7035, "encoder_q-layer.5": 1102.9476, "encoder_q-layer.6": 960.9758, "encoder_q-layer.7": 848.2393, "encoder_q-layer.8": 478.2394, "encoder_q-layer.9": 238.9915, "epoch": 0.12, "inbatch_neg_score": 0.4065, "inbatch_pos_score": 0.9355, "learning_rate": 4.4944444444444445e-05, "loss": 4.2022, "norm_diff": 0.0298, "norm_loss": 0.0, "num_token_doc": 66.6653, "num_token_overlap": 11.7214, "num_token_query": 31.535, "num_token_union": 65.1183, "num_word_context": 202.2318, "num_word_doc": 49.7331, "num_word_query": 23.4147, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1337.2875, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4055, "query_norm": 1.5286, "queue_k_norm": 1.4979, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.535, "sent_len_1": 66.6653, "sent_len_max_0": 127.5325, "sent_len_max_1": 189.4512, "stdk": 0.0463, "stdq": 0.0425, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 19100 }, { "accuracy": 42.8711, "active_queue_size": 16384.0, "cl_loss": 4.222, "doc_norm": 1.4974, "encoder_q-embeddings": 4637.2793, "encoder_q-layer.0": 3497.3701, "encoder_q-layer.1": 3374.4949, "encoder_q-layer.10": 194.0521, "encoder_q-layer.11": 501.4896, "encoder_q-layer.2": 3465.717, "encoder_q-layer.3": 4276.1113, "encoder_q-layer.4": 3856.0366, "encoder_q-layer.5": 4200.6733, "encoder_q-layer.6": 4908.7886, "encoder_q-layer.7": 5773.3892, "encoder_q-layer.8": 3039.2476, "encoder_q-layer.9": 476.0675, "epoch": 0.12, "inbatch_neg_score": 0.4328, "inbatch_pos_score": 0.9746, "learning_rate": 4.4888888888888894e-05, "loss": 4.222, "norm_diff": 0.0597, "norm_loss": 0.0, "num_token_doc": 66.91, "num_token_overlap": 11.6953, "num_token_query": 31.4265, "num_token_union": 65.212, "num_word_context": 202.4638, "num_word_doc": 49.9307, "num_word_query": 23.3504, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5803.5065, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4314, "query_norm": 1.5571, "queue_k_norm": 1.4926, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4265, "sent_len_1": 66.91, "sent_len_max_0": 127.4262, "sent_len_max_1": 189.6275, "stdk": 0.0464, "stdq": 0.0434, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 19200 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.2174, "doc_norm": 1.4853, "encoder_q-embeddings": 1736.0095, "encoder_q-layer.0": 1156.3632, "encoder_q-layer.1": 1279.9769, "encoder_q-layer.10": 434.6198, "encoder_q-layer.11": 1092.215, "encoder_q-layer.2": 1487.0499, "encoder_q-layer.3": 1471.1372, "encoder_q-layer.4": 1373.5818, "encoder_q-layer.5": 1086.0659, "encoder_q-layer.6": 847.9874, "encoder_q-layer.7": 751.4737, "encoder_q-layer.8": 557.5714, "encoder_q-layer.9": 416.4528, "epoch": 0.13, "inbatch_neg_score": 0.4302, "inbatch_pos_score": 0.9517, "learning_rate": 4.483333333333333e-05, "loss": 4.2174, "norm_diff": 0.0467, "norm_loss": 0.0, "num_token_doc": 66.9617, "num_token_overlap": 11.7058, "num_token_query": 31.4264, "num_token_union": 65.2639, "num_word_context": 202.3831, "num_word_doc": 49.9281, "num_word_query": 23.3269, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1729.2158, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.429, "query_norm": 1.532, "queue_k_norm": 1.487, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4264, "sent_len_1": 66.9617, "sent_len_max_0": 127.5512, "sent_len_max_1": 190.7512, "stdk": 0.0462, "stdq": 0.0432, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 19300 }, { "accuracy": 41.7969, "active_queue_size": 16384.0, "cl_loss": 4.2256, "doc_norm": 1.4809, "encoder_q-embeddings": 1206.5399, "encoder_q-layer.0": 878.4437, "encoder_q-layer.1": 946.3909, "encoder_q-layer.10": 370.4468, "encoder_q-layer.11": 949.5233, "encoder_q-layer.2": 1130.5598, "encoder_q-layer.3": 1180.5023, "encoder_q-layer.4": 1115.5627, "encoder_q-layer.5": 993.0885, "encoder_q-layer.6": 959.1063, "encoder_q-layer.7": 850.5317, "encoder_q-layer.8": 635.9423, "encoder_q-layer.9": 413.9331, "epoch": 0.13, "inbatch_neg_score": 0.385, "inbatch_pos_score": 0.9131, "learning_rate": 4.477777777777778e-05, "loss": 4.2256, "norm_diff": 0.0107, "norm_loss": 0.0, "num_token_doc": 66.8286, "num_token_overlap": 11.6244, "num_token_query": 31.1859, "num_token_union": 65.0942, "num_word_context": 202.138, "num_word_doc": 49.89, "num_word_query": 23.1545, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1409.1078, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3848, "query_norm": 1.4794, "queue_k_norm": 1.4797, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.1859, "sent_len_1": 66.8286, "sent_len_max_0": 127.3425, "sent_len_max_1": 188.9512, "stdk": 0.0462, "stdq": 0.0431, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 19400 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 4.2237, "doc_norm": 1.4719, "encoder_q-embeddings": 2811.6284, "encoder_q-layer.0": 2067.3689, "encoder_q-layer.1": 2078.9785, "encoder_q-layer.10": 384.3016, "encoder_q-layer.11": 1076.8943, "encoder_q-layer.2": 2002.5264, "encoder_q-layer.3": 1910.1365, "encoder_q-layer.4": 1896.3286, "encoder_q-layer.5": 1774.3256, "encoder_q-layer.6": 2002.2542, "encoder_q-layer.7": 1791.3385, "encoder_q-layer.8": 1040.203, "encoder_q-layer.9": 407.3568, "epoch": 0.13, "inbatch_neg_score": 0.4401, "inbatch_pos_score": 0.9644, "learning_rate": 4.472222222222223e-05, "loss": 4.2237, "norm_diff": 0.0366, "norm_loss": 0.0, "num_token_doc": 66.7808, "num_token_overlap": 11.7129, "num_token_query": 31.4488, "num_token_union": 65.0988, "num_word_context": 202.1852, "num_word_doc": 49.8092, "num_word_query": 23.3705, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2735.0496, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.439, "query_norm": 1.5085, "queue_k_norm": 1.47, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4488, "sent_len_1": 66.7808, "sent_len_max_0": 127.4938, "sent_len_max_1": 191.4737, "stdk": 0.0461, "stdq": 0.0428, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 19500 }, { "accuracy": 43.1641, "active_queue_size": 16384.0, "cl_loss": 4.2259, "doc_norm": 1.4707, "encoder_q-embeddings": 3208.8862, "encoder_q-layer.0": 2410.6384, "encoder_q-layer.1": 2623.8384, "encoder_q-layer.10": 436.0554, "encoder_q-layer.11": 1145.3723, "encoder_q-layer.2": 2955.2451, "encoder_q-layer.3": 2950.636, "encoder_q-layer.4": 2975.0042, "encoder_q-layer.5": 2693.2927, "encoder_q-layer.6": 2595.5972, "encoder_q-layer.7": 2380.3044, "encoder_q-layer.8": 2362.1267, "encoder_q-layer.9": 1298.8673, "epoch": 0.13, "inbatch_neg_score": 0.4374, "inbatch_pos_score": 0.9546, "learning_rate": 4.466666666666667e-05, "loss": 4.2259, "norm_diff": 0.0284, "norm_loss": 0.0, "num_token_doc": 66.7932, "num_token_overlap": 11.6599, "num_token_query": 31.3554, "num_token_union": 65.1343, "num_word_context": 202.4813, "num_word_doc": 49.8341, "num_word_query": 23.2982, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3713.1429, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4375, "query_norm": 1.4991, "queue_k_norm": 1.4659, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3554, "sent_len_1": 66.7932, "sent_len_max_0": 127.3213, "sent_len_max_1": 189.88, "stdk": 0.0463, "stdq": 0.0423, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 19600 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 4.2234, "doc_norm": 1.4641, "encoder_q-embeddings": 1153.9266, "encoder_q-layer.0": 903.4993, "encoder_q-layer.1": 919.1016, "encoder_q-layer.10": 364.2091, "encoder_q-layer.11": 946.3947, "encoder_q-layer.2": 1058.1589, "encoder_q-layer.3": 1103.3025, "encoder_q-layer.4": 1155.2592, "encoder_q-layer.5": 995.8862, "encoder_q-layer.6": 695.0704, "encoder_q-layer.7": 572.7923, "encoder_q-layer.8": 469.0826, "encoder_q-layer.9": 334.8839, "epoch": 0.13, "inbatch_neg_score": 0.3607, "inbatch_pos_score": 0.897, "learning_rate": 4.461111111111111e-05, "loss": 4.2234, "norm_diff": 0.0207, "norm_loss": 0.0, "num_token_doc": 66.6617, "num_token_overlap": 11.6947, "num_token_query": 31.3504, "num_token_union": 65.0105, "num_word_context": 201.9255, "num_word_doc": 49.7235, "num_word_query": 23.2928, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1320.7904, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3594, "query_norm": 1.4543, "queue_k_norm": 1.4601, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3504, "sent_len_1": 66.6617, "sent_len_max_0": 127.4188, "sent_len_max_1": 190.3363, "stdk": 0.0462, "stdq": 0.0429, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 19700 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 4.2544, "doc_norm": 1.4493, "encoder_q-embeddings": 712.7627, "encoder_q-layer.0": 496.8781, "encoder_q-layer.1": 581.6136, "encoder_q-layer.10": 433.7728, "encoder_q-layer.11": 1040.1392, "encoder_q-layer.2": 620.8273, "encoder_q-layer.3": 678.4559, "encoder_q-layer.4": 644.7992, "encoder_q-layer.5": 623.5917, "encoder_q-layer.6": 584.6396, "encoder_q-layer.7": 633.9026, "encoder_q-layer.8": 619.9691, "encoder_q-layer.9": 435.4828, "epoch": 0.13, "inbatch_neg_score": 0.3051, "inbatch_pos_score": 0.8301, "learning_rate": 4.4555555555555555e-05, "loss": 4.2544, "norm_diff": 0.0112, "norm_loss": 0.0, "num_token_doc": 66.741, "num_token_overlap": 11.624, "num_token_query": 31.2947, "num_token_union": 65.1076, "num_word_context": 201.9256, "num_word_doc": 49.7912, "num_word_query": 23.2176, "postclip_grad_norm": 1.0, "preclip_grad_norm": 960.0121, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3035, "query_norm": 1.4402, "queue_k_norm": 1.4529, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2947, "sent_len_1": 66.741, "sent_len_max_0": 127.6612, "sent_len_max_1": 188.3388, "stdk": 0.0458, "stdq": 0.0433, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 19800 }, { "accuracy": 41.7969, "active_queue_size": 16384.0, "cl_loss": 4.2291, "doc_norm": 1.4461, "encoder_q-embeddings": 741.4427, "encoder_q-layer.0": 531.7638, "encoder_q-layer.1": 586.1579, "encoder_q-layer.10": 385.3908, "encoder_q-layer.11": 1003.6327, "encoder_q-layer.2": 611.436, "encoder_q-layer.3": 601.6607, "encoder_q-layer.4": 554.4539, "encoder_q-layer.5": 522.4694, "encoder_q-layer.6": 472.3627, "encoder_q-layer.7": 495.9772, "encoder_q-layer.8": 478.4273, "encoder_q-layer.9": 357.7665, "epoch": 0.13, "inbatch_neg_score": 0.3594, "inbatch_pos_score": 0.8696, "learning_rate": 4.4500000000000004e-05, "loss": 4.2291, "norm_diff": 0.0264, "norm_loss": 0.0, "num_token_doc": 66.8409, "num_token_overlap": 11.6992, "num_token_query": 31.4339, "num_token_union": 65.1686, "num_word_context": 202.2573, "num_word_doc": 49.8308, "num_word_query": 23.3453, "postclip_grad_norm": 1.0, "preclip_grad_norm": 916.2651, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3582, "query_norm": 1.4198, "queue_k_norm": 1.4449, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4339, "sent_len_1": 66.8409, "sent_len_max_0": 127.6275, "sent_len_max_1": 192.4062, "stdk": 0.0459, "stdq": 0.0421, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 19900 }, { "accuracy": 40.0391, "active_queue_size": 16384.0, "cl_loss": 4.2245, "doc_norm": 1.4397, "encoder_q-embeddings": 2379.1121, "encoder_q-layer.0": 1514.1439, "encoder_q-layer.1": 1733.6443, "encoder_q-layer.10": 402.7013, "encoder_q-layer.11": 953.6111, "encoder_q-layer.2": 1910.2061, "encoder_q-layer.3": 1652.7419, "encoder_q-layer.4": 1512.4731, "encoder_q-layer.5": 992.4806, "encoder_q-layer.6": 934.0639, "encoder_q-layer.7": 796.419, "encoder_q-layer.8": 607.4616, "encoder_q-layer.9": 447.2303, "epoch": 0.13, "inbatch_neg_score": 0.3294, "inbatch_pos_score": 0.8257, "learning_rate": 4.4444444444444447e-05, "loss": 4.2245, "norm_diff": 0.0253, "norm_loss": 0.0, "num_token_doc": 66.8554, "num_token_overlap": 11.6921, "num_token_query": 31.3667, "num_token_union": 65.1218, "num_word_context": 202.4163, "num_word_doc": 49.8757, "num_word_query": 23.3084, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2177.8315, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3301, "query_norm": 1.4144, "queue_k_norm": 1.4394, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3667, "sent_len_1": 66.8554, "sent_len_max_0": 127.54, "sent_len_max_1": 190.3388, "stdk": 0.0459, "stdq": 0.0421, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 20000 }, { "dev_runtime": 30.1641, "dev_samples_per_second": 2.122, "dev_steps_per_second": 0.033, "epoch": 0.13, "step": 20000, "test_accuracy": 90.6982421875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.55184406042099, "test_doc_norm": 1.3862961530685425, "test_inbatch_neg_score": 0.5824059247970581, "test_inbatch_pos_score": 1.3332602977752686, "test_loss": 0.55184406042099, "test_loss_align": 1.3353259563446045, "test_loss_unif": 3.8174610137939453, "test_loss_unif_q@queue": 3.8174610137939453, "test_norm_diff": 0.048930779099464417, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2881104052066803, "test_query_norm": 1.4352269172668457, "test_queue_k_norm": 1.4392898082733154, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.0385286808013916, "test_stdq": 0.03856799751520157, "test_stdqueue_k": 0.045962415635585785, "test_stdqueue_q": 0.0 }, { "dev_runtime": 30.1641, "dev_samples_per_second": 2.122, "dev_steps_per_second": 0.033, "epoch": 0.13, "eval_beir-arguana_ndcg@10": 0.28166, "eval_beir-arguana_recall@10": 0.50284, "eval_beir-arguana_recall@100": 0.83642, "eval_beir-arguana_recall@20": 0.6394, "eval_beir-avg_ndcg@10": 0.31719766666666666, "eval_beir-avg_recall@10": 0.3819735, "eval_beir-avg_recall@100": 0.5640592499999999, "eval_beir-avg_recall@20": 0.4416855833333334, "eval_beir-cqadupstack_ndcg@10": 0.1846066666666667, "eval_beir-cqadupstack_recall@10": 0.263945, "eval_beir-cqadupstack_recall@100": 0.4835225, "eval_beir-cqadupstack_recall@20": 0.32376583333333336, "eval_beir-fiqa_ndcg@10": 0.17395, "eval_beir-fiqa_recall@10": 0.23035, "eval_beir-fiqa_recall@100": 0.48429, "eval_beir-fiqa_recall@20": 0.29366, "eval_beir-nfcorpus_ndcg@10": 0.2484, "eval_beir-nfcorpus_recall@10": 0.12542, "eval_beir-nfcorpus_recall@100": 0.24117, "eval_beir-nfcorpus_recall@20": 0.15116, "eval_beir-nq_ndcg@10": 0.20212, "eval_beir-nq_recall@10": 0.33879, "eval_beir-nq_recall@100": 0.67357, "eval_beir-nq_recall@20": 0.45331, "eval_beir-quora_ndcg@10": 0.69412, "eval_beir-quora_recall@10": 0.81963, "eval_beir-quora_recall@100": 0.95604, "eval_beir-quora_recall@20": 0.87429, "eval_beir-scidocs_ndcg@10": 0.12514, "eval_beir-scidocs_recall@10": 0.13332, "eval_beir-scidocs_recall@100": 0.3215, "eval_beir-scidocs_recall@20": 0.17987, "eval_beir-scifact_ndcg@10": 0.5854, "eval_beir-scifact_recall@10": 0.73344, "eval_beir-scifact_recall@100": 0.88322, "eval_beir-scifact_recall@20": 0.80411, "eval_beir-trec-covid_ndcg@10": 0.5048, "eval_beir-trec-covid_recall@10": 0.558, "eval_beir-trec-covid_recall@100": 0.3692, "eval_beir-trec-covid_recall@20": 0.521, "eval_beir-webis-touche2020_ndcg@10": 0.17178, "eval_beir-webis-touche2020_recall@10": 0.114, "eval_beir-webis-touche2020_recall@100": 0.39166, "eval_beir-webis-touche2020_recall@20": 0.17629, "eval_senteval-avg_sts": 0.7184140434793627, "eval_senteval-sickr_spearman": 0.6828378519897393, "eval_senteval-stsb_spearman": 0.753990234968986, "step": 20000, "test_accuracy": 90.6982421875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.55184406042099, "test_doc_norm": 1.3862961530685425, "test_inbatch_neg_score": 0.5824059247970581, "test_inbatch_pos_score": 1.3332602977752686, "test_loss": 0.55184406042099, "test_loss_align": 1.3353259563446045, "test_loss_unif": 3.8174610137939453, "test_loss_unif_q@queue": 3.8174610137939453, "test_norm_diff": 0.048930779099464417, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2881104052066803, "test_query_norm": 1.4352269172668457, "test_queue_k_norm": 1.4392898082733154, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.0385286808013916, "test_stdq": 0.03856799751520157, "test_stdqueue_k": 0.045962415635585785, "test_stdqueue_q": 0.0 }, { "accuracy": 41.4062, "active_queue_size": 16384.0, "cl_loss": 4.2124, "doc_norm": 1.43, "encoder_q-embeddings": 3057.8608, "encoder_q-layer.0": 2387.8828, "encoder_q-layer.1": 2663.4321, "encoder_q-layer.10": 406.4884, "encoder_q-layer.11": 1081.9065, "encoder_q-layer.2": 3146.4878, "encoder_q-layer.3": 2892.4473, "encoder_q-layer.4": 2462.7712, "encoder_q-layer.5": 2022.4896, "encoder_q-layer.6": 1540.283, "encoder_q-layer.7": 1213.0389, "encoder_q-layer.8": 692.3398, "encoder_q-layer.9": 387.2686, "epoch": 0.13, "inbatch_neg_score": 0.3401, "inbatch_pos_score": 0.8569, "learning_rate": 4.438888888888889e-05, "loss": 4.2124, "norm_diff": 0.0118, "norm_loss": 0.0, "num_token_doc": 66.8775, "num_token_overlap": 11.6776, "num_token_query": 31.391, "num_token_union": 65.2116, "num_word_context": 202.3224, "num_word_doc": 49.9179, "num_word_query": 23.3127, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3174.6916, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3389, "query_norm": 1.4358, "queue_k_norm": 1.4334, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.391, "sent_len_1": 66.8775, "sent_len_max_0": 127.5138, "sent_len_max_1": 190.9675, "stdk": 0.0457, "stdq": 0.0427, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 20100 }, { "accuracy": 38.0859, "active_queue_size": 16384.0, "cl_loss": 4.2263, "doc_norm": 1.4265, "encoder_q-embeddings": 1608.3221, "encoder_q-layer.0": 1258.5999, "encoder_q-layer.1": 1303.3237, "encoder_q-layer.10": 369.3761, "encoder_q-layer.11": 1039.4695, "encoder_q-layer.2": 1519.1293, "encoder_q-layer.3": 1538.1069, "encoder_q-layer.4": 1585.2329, "encoder_q-layer.5": 1395.0514, "encoder_q-layer.6": 1279.9561, "encoder_q-layer.7": 1208.6881, "encoder_q-layer.8": 714.6293, "encoder_q-layer.9": 371.2969, "epoch": 0.13, "inbatch_neg_score": 0.3029, "inbatch_pos_score": 0.8008, "learning_rate": 4.433333333333334e-05, "loss": 4.2263, "norm_diff": 0.0344, "norm_loss": 0.0, "num_token_doc": 66.8884, "num_token_overlap": 11.7097, "num_token_query": 31.4625, "num_token_union": 65.1701, "num_word_context": 202.2911, "num_word_doc": 49.8961, "num_word_query": 23.3711, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1860.9572, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3013, "query_norm": 1.3923, "queue_k_norm": 1.4294, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4625, "sent_len_1": 66.8884, "sent_len_max_0": 127.5312, "sent_len_max_1": 190.5475, "stdk": 0.0458, "stdq": 0.042, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 20200 }, { "accuracy": 42.5781, "active_queue_size": 16384.0, "cl_loss": 4.2193, "doc_norm": 1.4189, "encoder_q-embeddings": 9029.0654, "encoder_q-layer.0": 6557.9502, "encoder_q-layer.1": 7533.8599, "encoder_q-layer.10": 392.25, "encoder_q-layer.11": 976.5206, "encoder_q-layer.2": 10523.1475, "encoder_q-layer.3": 9464.3818, "encoder_q-layer.4": 6480.2803, "encoder_q-layer.5": 3169.1016, "encoder_q-layer.6": 2684.9519, "encoder_q-layer.7": 2597.6895, "encoder_q-layer.8": 1382.6014, "encoder_q-layer.9": 409.6587, "epoch": 0.13, "inbatch_neg_score": 0.3004, "inbatch_pos_score": 0.812, "learning_rate": 4.427777777777778e-05, "loss": 4.2193, "norm_diff": 0.028, "norm_loss": 0.0, "num_token_doc": 66.8737, "num_token_overlap": 11.6822, "num_token_query": 31.3991, "num_token_union": 65.1726, "num_word_context": 202.3806, "num_word_doc": 49.9271, "num_word_query": 23.3238, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9082.2716, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3005, "query_norm": 1.3909, "queue_k_norm": 1.4246, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3991, "sent_len_1": 66.8737, "sent_len_max_0": 127.5113, "sent_len_max_1": 188.7925, "stdk": 0.0457, "stdq": 0.0421, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 20300 }, { "accuracy": 40.7227, "active_queue_size": 16384.0, "cl_loss": 4.218, "doc_norm": 1.4176, "encoder_q-embeddings": 1138.7457, "encoder_q-layer.0": 814.82, "encoder_q-layer.1": 995.2242, "encoder_q-layer.10": 391.5648, "encoder_q-layer.11": 945.0941, "encoder_q-layer.2": 1072.6406, "encoder_q-layer.3": 1131.298, "encoder_q-layer.4": 1140.4612, "encoder_q-layer.5": 872.4962, "encoder_q-layer.6": 587.868, "encoder_q-layer.7": 472.064, "encoder_q-layer.8": 439.5111, "encoder_q-layer.9": 340.5058, "epoch": 0.13, "inbatch_neg_score": 0.2924, "inbatch_pos_score": 0.8223, "learning_rate": 4.422222222222222e-05, "loss": 4.218, "norm_diff": 0.0148, "norm_loss": 0.0, "num_token_doc": 66.9073, "num_token_overlap": 11.6601, "num_token_query": 31.3413, "num_token_union": 65.2012, "num_word_context": 202.5714, "num_word_doc": 49.9051, "num_word_query": 23.2719, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1279.2899, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.293, "query_norm": 1.4029, "queue_k_norm": 1.419, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3413, "sent_len_1": 66.9073, "sent_len_max_0": 127.5037, "sent_len_max_1": 189.6575, "stdk": 0.0458, "stdq": 0.0431, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 20400 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.218, "doc_norm": 1.411, "encoder_q-embeddings": 1365.1121, "encoder_q-layer.0": 1004.8645, "encoder_q-layer.1": 1031.5524, "encoder_q-layer.10": 357.2946, "encoder_q-layer.11": 1006.516, "encoder_q-layer.2": 1102.3781, "encoder_q-layer.3": 1122.3395, "encoder_q-layer.4": 1162.5492, "encoder_q-layer.5": 1041.67, "encoder_q-layer.6": 805.5715, "encoder_q-layer.7": 547.9622, "encoder_q-layer.8": 433.0905, "encoder_q-layer.9": 327.3418, "epoch": 0.13, "inbatch_neg_score": 0.2863, "inbatch_pos_score": 0.7988, "learning_rate": 4.4166666666666665e-05, "loss": 4.218, "norm_diff": 0.0337, "norm_loss": 0.0, "num_token_doc": 66.7436, "num_token_overlap": 11.6415, "num_token_query": 31.4729, "num_token_union": 65.2128, "num_word_context": 202.353, "num_word_doc": 49.7886, "num_word_query": 23.3911, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1444.4733, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2856, "query_norm": 1.3774, "queue_k_norm": 1.415, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4729, "sent_len_1": 66.7436, "sent_len_max_0": 127.4125, "sent_len_max_1": 189.5075, "stdk": 0.0456, "stdq": 0.0418, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 20500 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.2255, "doc_norm": 1.4055, "encoder_q-embeddings": 732.5027, "encoder_q-layer.0": 519.2552, "encoder_q-layer.1": 598.8922, "encoder_q-layer.10": 372.1041, "encoder_q-layer.11": 999.6315, "encoder_q-layer.2": 660.0177, "encoder_q-layer.3": 693.3576, "encoder_q-layer.4": 621.1711, "encoder_q-layer.5": 615.6916, "encoder_q-layer.6": 603.4681, "encoder_q-layer.7": 567.541, "encoder_q-layer.8": 466.6246, "encoder_q-layer.9": 343.3926, "epoch": 0.13, "inbatch_neg_score": 0.2871, "inbatch_pos_score": 0.813, "learning_rate": 4.4111111111111114e-05, "loss": 4.2255, "norm_diff": 0.0083, "norm_loss": 0.0, "num_token_doc": 66.5695, "num_token_overlap": 11.6639, "num_token_query": 31.4321, "num_token_union": 65.0492, "num_word_context": 202.0762, "num_word_doc": 49.6779, "num_word_query": 23.342, "postclip_grad_norm": 1.0, "preclip_grad_norm": 942.8473, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2854, "query_norm": 1.4047, "queue_k_norm": 1.4099, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4321, "sent_len_1": 66.5695, "sent_len_max_0": 127.6137, "sent_len_max_1": 189.3375, "stdk": 0.0456, "stdq": 0.0428, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 20600 }, { "accuracy": 41.3086, "active_queue_size": 16384.0, "cl_loss": 4.2199, "doc_norm": 1.4016, "encoder_q-embeddings": 1038.9332, "encoder_q-layer.0": 689.8857, "encoder_q-layer.1": 856.3444, "encoder_q-layer.10": 349.4471, "encoder_q-layer.11": 937.0739, "encoder_q-layer.2": 1031.2875, "encoder_q-layer.3": 1125.1465, "encoder_q-layer.4": 1205.7681, "encoder_q-layer.5": 1130.0165, "encoder_q-layer.6": 811.3398, "encoder_q-layer.7": 631.4794, "encoder_q-layer.8": 459.5591, "encoder_q-layer.9": 332.7386, "epoch": 0.13, "inbatch_neg_score": 0.2709, "inbatch_pos_score": 0.7676, "learning_rate": 4.4055555555555557e-05, "loss": 4.2199, "norm_diff": 0.0351, "norm_loss": 0.0, "num_token_doc": 66.4975, "num_token_overlap": 11.6303, "num_token_query": 31.2316, "num_token_union": 64.9063, "num_word_context": 201.9098, "num_word_doc": 49.6275, "num_word_query": 23.1705, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1296.9981, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.269, "query_norm": 1.3665, "queue_k_norm": 1.4076, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2316, "sent_len_1": 66.4975, "sent_len_max_0": 127.5, "sent_len_max_1": 186.905, "stdk": 0.0456, "stdq": 0.0419, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 20700 }, { "accuracy": 40.7227, "active_queue_size": 16384.0, "cl_loss": 4.2081, "doc_norm": 1.4014, "encoder_q-embeddings": 1817.0103, "encoder_q-layer.0": 1326.3292, "encoder_q-layer.1": 1311.1736, "encoder_q-layer.10": 371.119, "encoder_q-layer.11": 1037.2122, "encoder_q-layer.2": 1371.795, "encoder_q-layer.3": 1398.1521, "encoder_q-layer.4": 1318.0347, "encoder_q-layer.5": 1125.2209, "encoder_q-layer.6": 752.4825, "encoder_q-layer.7": 636.1531, "encoder_q-layer.8": 483.7, "encoder_q-layer.9": 340.2464, "epoch": 0.14, "inbatch_neg_score": 0.3014, "inbatch_pos_score": 0.8145, "learning_rate": 4.4000000000000006e-05, "loss": 4.2081, "norm_diff": 0.0156, "norm_loss": 0.0, "num_token_doc": 66.6181, "num_token_overlap": 11.6426, "num_token_query": 31.4273, "num_token_union": 65.1373, "num_word_context": 201.981, "num_word_doc": 49.6949, "num_word_query": 23.349, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1747.8416, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2981, "query_norm": 1.3937, "queue_k_norm": 1.4027, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4273, "sent_len_1": 66.6181, "sent_len_max_0": 127.5625, "sent_len_max_1": 189.31, "stdk": 0.0457, "stdq": 0.0427, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 20800 }, { "accuracy": 41.0156, "active_queue_size": 16384.0, "cl_loss": 4.2284, "doc_norm": 1.3919, "encoder_q-embeddings": 1969.3446, "encoder_q-layer.0": 1502.8441, "encoder_q-layer.1": 1692.2756, "encoder_q-layer.10": 355.5166, "encoder_q-layer.11": 909.063, "encoder_q-layer.2": 2009.4774, "encoder_q-layer.3": 2049.1082, "encoder_q-layer.4": 1823.9081, "encoder_q-layer.5": 1595.8497, "encoder_q-layer.6": 1465.9681, "encoder_q-layer.7": 1055.0153, "encoder_q-layer.8": 517.526, "encoder_q-layer.9": 337.8863, "epoch": 0.14, "inbatch_neg_score": 0.2864, "inbatch_pos_score": 0.8013, "learning_rate": 4.394444444444445e-05, "loss": 4.2284, "norm_diff": 0.0142, "norm_loss": 0.0, "num_token_doc": 66.494, "num_token_overlap": 11.6636, "num_token_query": 31.48, "num_token_union": 65.0079, "num_word_context": 202.1118, "num_word_doc": 49.6404, "num_word_query": 23.3973, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2200.2612, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2864, "query_norm": 1.3842, "queue_k_norm": 1.4009, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.48, "sent_len_1": 66.494, "sent_len_max_0": 127.4275, "sent_len_max_1": 188.4425, "stdk": 0.0455, "stdq": 0.0418, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 20900 }, { "accuracy": 41.7969, "active_queue_size": 16384.0, "cl_loss": 4.2, "doc_norm": 1.4014, "encoder_q-embeddings": 4230.373, "encoder_q-layer.0": 3100.9905, "encoder_q-layer.1": 3699.2651, "encoder_q-layer.10": 369.1719, "encoder_q-layer.11": 998.4587, "encoder_q-layer.2": 4075.3174, "encoder_q-layer.3": 3224.0911, "encoder_q-layer.4": 2690.0581, "encoder_q-layer.5": 2612.2666, "encoder_q-layer.6": 2143.4885, "encoder_q-layer.7": 1606.5737, "encoder_q-layer.8": 1055.4517, "encoder_q-layer.9": 486.056, "epoch": 0.14, "inbatch_neg_score": 0.2972, "inbatch_pos_score": 0.8081, "learning_rate": 4.388888888888889e-05, "loss": 4.2, "norm_diff": 0.0174, "norm_loss": 0.0, "num_token_doc": 66.8382, "num_token_overlap": 11.6937, "num_token_query": 31.4636, "num_token_union": 65.2251, "num_word_context": 202.3002, "num_word_doc": 49.8756, "num_word_query": 23.3637, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4093.88, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2949, "query_norm": 1.4157, "queue_k_norm": 1.3971, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4636, "sent_len_1": 66.8382, "sent_len_max_0": 127.5625, "sent_len_max_1": 189.5087, "stdk": 0.0459, "stdq": 0.0431, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 21000 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 4.1844, "doc_norm": 1.3982, "encoder_q-embeddings": 762.6342, "encoder_q-layer.0": 528.6391, "encoder_q-layer.1": 626.5704, "encoder_q-layer.10": 361.8279, "encoder_q-layer.11": 943.2108, "encoder_q-layer.2": 715.4824, "encoder_q-layer.3": 760.7084, "encoder_q-layer.4": 765.45, "encoder_q-layer.5": 773.749, "encoder_q-layer.6": 715.0011, "encoder_q-layer.7": 633.5707, "encoder_q-layer.8": 468.0242, "encoder_q-layer.9": 355.448, "epoch": 0.14, "inbatch_neg_score": 0.2999, "inbatch_pos_score": 0.811, "learning_rate": 4.383333333333334e-05, "loss": 4.1844, "norm_diff": 0.0095, "norm_loss": 0.0, "num_token_doc": 66.7239, "num_token_overlap": 11.7222, "num_token_query": 31.5071, "num_token_union": 65.0936, "num_word_context": 202.5185, "num_word_doc": 49.7745, "num_word_query": 23.4048, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1005.0309, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2988, "query_norm": 1.4071, "queue_k_norm": 1.3967, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.5071, "sent_len_1": 66.7239, "sent_len_max_0": 127.4688, "sent_len_max_1": 189.2862, "stdk": 0.0459, "stdq": 0.0422, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 21100 }, { "accuracy": 41.7969, "active_queue_size": 16384.0, "cl_loss": 4.1845, "doc_norm": 1.3934, "encoder_q-embeddings": 1371.6252, "encoder_q-layer.0": 904.5032, "encoder_q-layer.1": 987.7534, "encoder_q-layer.10": 360.687, "encoder_q-layer.11": 987.1045, "encoder_q-layer.2": 1115.9331, "encoder_q-layer.3": 1144.7804, "encoder_q-layer.4": 1179.6737, "encoder_q-layer.5": 1084.2498, "encoder_q-layer.6": 1061.8448, "encoder_q-layer.7": 866.9741, "encoder_q-layer.8": 704.0223, "encoder_q-layer.9": 435.0916, "epoch": 0.14, "inbatch_neg_score": 0.3214, "inbatch_pos_score": 0.833, "learning_rate": 4.377777777777778e-05, "loss": 4.1845, "norm_diff": 0.0275, "norm_loss": 0.0, "num_token_doc": 66.7042, "num_token_overlap": 11.6639, "num_token_query": 31.3686, "num_token_union": 65.107, "num_word_context": 202.6537, "num_word_doc": 49.8198, "num_word_query": 23.3067, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1492.4854, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3215, "query_norm": 1.4208, "queue_k_norm": 1.396, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3686, "sent_len_1": 66.7042, "sent_len_max_0": 127.4825, "sent_len_max_1": 186.8088, "stdk": 0.0457, "stdq": 0.0425, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 21200 }, { "accuracy": 41.8945, "active_queue_size": 16384.0, "cl_loss": 4.1848, "doc_norm": 1.3881, "encoder_q-embeddings": 18896.7969, "encoder_q-layer.0": 14014.6797, "encoder_q-layer.1": 13435.8467, "encoder_q-layer.10": 813.4692, "encoder_q-layer.11": 1937.54, "encoder_q-layer.2": 16854.1094, "encoder_q-layer.3": 16206.1299, "encoder_q-layer.4": 16661.6113, "encoder_q-layer.5": 14435.3135, "encoder_q-layer.6": 13114.5898, "encoder_q-layer.7": 10777.4238, "encoder_q-layer.8": 8067.2007, "encoder_q-layer.9": 3608.3269, "epoch": 0.14, "inbatch_neg_score": 0.3317, "inbatch_pos_score": 0.8467, "learning_rate": 4.3722222222222224e-05, "loss": 4.1848, "norm_diff": 0.0339, "norm_loss": 0.0, "num_token_doc": 66.8059, "num_token_overlap": 11.7005, "num_token_query": 31.468, "num_token_union": 65.188, "num_word_context": 202.4582, "num_word_doc": 49.8348, "num_word_query": 23.366, "postclip_grad_norm": 1.0, "preclip_grad_norm": 19556.1698, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.3301, "query_norm": 1.422, "queue_k_norm": 1.3948, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.468, "sent_len_1": 66.8059, "sent_len_max_0": 127.4762, "sent_len_max_1": 189.0225, "stdk": 0.0455, "stdq": 0.0424, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 21300 }, { "accuracy": 41.8945, "active_queue_size": 16384.0, "cl_loss": 4.1752, "doc_norm": 1.3946, "encoder_q-embeddings": 7106.856, "encoder_q-layer.0": 5748.4131, "encoder_q-layer.1": 6227.0659, "encoder_q-layer.10": 689.2104, "encoder_q-layer.11": 1952.7714, "encoder_q-layer.2": 7495.7085, "encoder_q-layer.3": 8227.1396, "encoder_q-layer.4": 8085.646, "encoder_q-layer.5": 5595.9521, "encoder_q-layer.6": 3997.2981, "encoder_q-layer.7": 2600.9963, "encoder_q-layer.8": 1096.4603, "encoder_q-layer.9": 667.203, "epoch": 0.14, "inbatch_neg_score": 0.343, "inbatch_pos_score": 0.8628, "learning_rate": 4.3666666666666666e-05, "loss": 4.1752, "norm_diff": 0.0397, "norm_loss": 0.0, "num_token_doc": 66.6226, "num_token_overlap": 11.6535, "num_token_query": 31.3906, "num_token_union": 65.0856, "num_word_context": 202.6397, "num_word_doc": 49.7518, "num_word_query": 23.322, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8065.5998, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3428, "query_norm": 1.4343, "queue_k_norm": 1.3953, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3906, "sent_len_1": 66.6226, "sent_len_max_0": 127.505, "sent_len_max_1": 187.0938, "stdk": 0.0457, "stdq": 0.0421, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 21400 }, { "accuracy": 42.4805, "active_queue_size": 16384.0, "cl_loss": 4.1801, "doc_norm": 1.3982, "encoder_q-embeddings": 1619.2039, "encoder_q-layer.0": 1074.2633, "encoder_q-layer.1": 1154.177, "encoder_q-layer.10": 748.4518, "encoder_q-layer.11": 1851.5122, "encoder_q-layer.2": 1362.3896, "encoder_q-layer.3": 1431.4532, "encoder_q-layer.4": 1400.3336, "encoder_q-layer.5": 1350.095, "encoder_q-layer.6": 1241.048, "encoder_q-layer.7": 1111.2329, "encoder_q-layer.8": 875.9052, "encoder_q-layer.9": 680.7538, "epoch": 0.14, "inbatch_neg_score": 0.3549, "inbatch_pos_score": 0.876, "learning_rate": 4.3611111111111116e-05, "loss": 4.1801, "norm_diff": 0.0539, "norm_loss": 0.0, "num_token_doc": 66.7385, "num_token_overlap": 11.6459, "num_token_query": 31.3367, "num_token_union": 65.1051, "num_word_context": 202.3033, "num_word_doc": 49.8214, "num_word_query": 23.2866, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1926.8842, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3528, "query_norm": 1.4522, "queue_k_norm": 1.3951, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3367, "sent_len_1": 66.7385, "sent_len_max_0": 127.4963, "sent_len_max_1": 186.92, "stdk": 0.0458, "stdq": 0.0426, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 21500 }, { "accuracy": 40.8203, "active_queue_size": 16384.0, "cl_loss": 4.1945, "doc_norm": 1.3946, "encoder_q-embeddings": 2768.001, "encoder_q-layer.0": 2197.4617, "encoder_q-layer.1": 2253.2119, "encoder_q-layer.10": 729.2599, "encoder_q-layer.11": 1839.8798, "encoder_q-layer.2": 2719.6843, "encoder_q-layer.3": 2575.7417, "encoder_q-layer.4": 2098.8135, "encoder_q-layer.5": 1388.7985, "encoder_q-layer.6": 1095.2332, "encoder_q-layer.7": 793.1817, "encoder_q-layer.8": 655.3434, "encoder_q-layer.9": 604.9939, "epoch": 0.14, "inbatch_neg_score": 0.3654, "inbatch_pos_score": 0.8789, "learning_rate": 4.355555555555556e-05, "loss": 4.1945, "norm_diff": 0.0525, "norm_loss": 0.0, "num_token_doc": 66.86, "num_token_overlap": 11.6467, "num_token_query": 31.3977, "num_token_union": 65.222, "num_word_context": 202.8086, "num_word_doc": 49.9013, "num_word_query": 23.3204, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2863.0923, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3628, "query_norm": 1.4471, "queue_k_norm": 1.3972, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3977, "sent_len_1": 66.86, "sent_len_max_0": 127.5175, "sent_len_max_1": 190.7363, "stdk": 0.0456, "stdq": 0.0426, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 21600 }, { "accuracy": 40.625, "active_queue_size": 16384.0, "cl_loss": 4.1882, "doc_norm": 1.3957, "encoder_q-embeddings": 3145.4473, "encoder_q-layer.0": 2095.334, "encoder_q-layer.1": 2398.7373, "encoder_q-layer.10": 809.2777, "encoder_q-layer.11": 2110.7051, "encoder_q-layer.2": 2699.0742, "encoder_q-layer.3": 3101.1357, "encoder_q-layer.4": 3085.865, "encoder_q-layer.5": 3214.8145, "encoder_q-layer.6": 2777.8445, "encoder_q-layer.7": 2617.5735, "encoder_q-layer.8": 2174.0273, "encoder_q-layer.9": 916.6901, "epoch": 0.14, "inbatch_neg_score": 0.3629, "inbatch_pos_score": 0.874, "learning_rate": 4.35e-05, "loss": 4.1882, "norm_diff": 0.0452, "norm_loss": 0.0, "num_token_doc": 66.9622, "num_token_overlap": 11.6585, "num_token_query": 31.3583, "num_token_union": 65.2195, "num_word_context": 202.0443, "num_word_doc": 49.9273, "num_word_query": 23.2817, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3764.7413, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3618, "query_norm": 1.4409, "queue_k_norm": 1.3971, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3583, "sent_len_1": 66.9622, "sent_len_max_0": 127.3588, "sent_len_max_1": 191.99, "stdk": 0.0457, "stdq": 0.0425, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 21700 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 4.1977, "doc_norm": 1.4007, "encoder_q-embeddings": 23828.5039, "encoder_q-layer.0": 18467.2461, "encoder_q-layer.1": 16964.9551, "encoder_q-layer.10": 422.2865, "encoder_q-layer.11": 1062.7048, "encoder_q-layer.2": 16801.8125, "encoder_q-layer.3": 15059.5586, "encoder_q-layer.4": 14158.5117, "encoder_q-layer.5": 12097.5859, "encoder_q-layer.6": 8891.7334, "encoder_q-layer.7": 5201.8169, "encoder_q-layer.8": 2635.0295, "encoder_q-layer.9": 689.5169, "epoch": 0.14, "inbatch_neg_score": 0.3692, "inbatch_pos_score": 0.9131, "learning_rate": 4.344444444444445e-05, "loss": 4.1977, "norm_diff": 0.0602, "norm_loss": 0.0, "num_token_doc": 66.7896, "num_token_overlap": 11.6185, "num_token_query": 31.3586, "num_token_union": 65.1892, "num_word_context": 202.3469, "num_word_doc": 49.8522, "num_word_query": 23.2754, "postclip_grad_norm": 1.0, "preclip_grad_norm": 21014.1237, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.3679, "query_norm": 1.4609, "queue_k_norm": 1.4001, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3586, "sent_len_1": 66.7896, "sent_len_max_0": 127.31, "sent_len_max_1": 188.705, "stdk": 0.0458, "stdq": 0.0438, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 21800 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 4.1903, "doc_norm": 1.3996, "encoder_q-embeddings": 1243.3708, "encoder_q-layer.0": 818.0585, "encoder_q-layer.1": 851.7847, "encoder_q-layer.10": 348.0201, "encoder_q-layer.11": 871.7094, "encoder_q-layer.2": 985.925, "encoder_q-layer.3": 1110.2104, "encoder_q-layer.4": 1143.4293, "encoder_q-layer.5": 1179.6493, "encoder_q-layer.6": 903.9731, "encoder_q-layer.7": 864.3133, "encoder_q-layer.8": 559.6277, "encoder_q-layer.9": 347.2394, "epoch": 0.14, "inbatch_neg_score": 0.3737, "inbatch_pos_score": 0.8687, "learning_rate": 4.338888888888889e-05, "loss": 4.1903, "norm_diff": 0.0275, "norm_loss": 0.0, "num_token_doc": 67.0086, "num_token_overlap": 11.6594, "num_token_query": 31.322, "num_token_union": 65.2946, "num_word_context": 202.0504, "num_word_doc": 49.9928, "num_word_query": 23.2469, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1370.8991, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3726, "query_norm": 1.4272, "queue_k_norm": 1.401, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.322, "sent_len_1": 67.0086, "sent_len_max_0": 127.4375, "sent_len_max_1": 190.375, "stdk": 0.0457, "stdq": 0.0422, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 21900 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.1903, "doc_norm": 1.3967, "encoder_q-embeddings": 2339.6619, "encoder_q-layer.0": 1790.9739, "encoder_q-layer.1": 2067.4722, "encoder_q-layer.10": 368.5312, "encoder_q-layer.11": 974.0884, "encoder_q-layer.2": 2098.0376, "encoder_q-layer.3": 2014.6737, "encoder_q-layer.4": 1949.543, "encoder_q-layer.5": 1485.2217, "encoder_q-layer.6": 1172.5436, "encoder_q-layer.7": 778.4794, "encoder_q-layer.8": 536.5082, "encoder_q-layer.9": 360.4597, "epoch": 0.14, "inbatch_neg_score": 0.3768, "inbatch_pos_score": 0.8853, "learning_rate": 4.3333333333333334e-05, "loss": 4.1903, "norm_diff": 0.0238, "norm_loss": 0.0, "num_token_doc": 66.8122, "num_token_overlap": 11.6748, "num_token_query": 31.316, "num_token_union": 65.0852, "num_word_context": 202.1439, "num_word_doc": 49.8442, "num_word_query": 23.2576, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2370.6304, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.375, "query_norm": 1.4205, "queue_k_norm": 1.4019, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.316, "sent_len_1": 66.8122, "sent_len_max_0": 127.4137, "sent_len_max_1": 190.32, "stdk": 0.0455, "stdq": 0.0423, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 22000 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.2334, "doc_norm": 1.405, "encoder_q-embeddings": 23068.5625, "encoder_q-layer.0": 17099.8848, "encoder_q-layer.1": 21100.9668, "encoder_q-layer.10": 379.6085, "encoder_q-layer.11": 948.1993, "encoder_q-layer.2": 20359.7832, "encoder_q-layer.3": 21416.6504, "encoder_q-layer.4": 21555.5449, "encoder_q-layer.5": 20177.3438, "encoder_q-layer.6": 17544.666, "encoder_q-layer.7": 13466.6377, "encoder_q-layer.8": 5875.0459, "encoder_q-layer.9": 959.7845, "epoch": 0.14, "inbatch_neg_score": 0.3697, "inbatch_pos_score": 0.9087, "learning_rate": 4.3277777777777776e-05, "loss": 4.2334, "norm_diff": 0.0708, "norm_loss": 0.0, "num_token_doc": 66.6883, "num_token_overlap": 11.6249, "num_token_query": 31.3317, "num_token_union": 65.0311, "num_word_context": 202.1998, "num_word_doc": 49.7235, "num_word_query": 23.2823, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25229.5814, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.3694, "query_norm": 1.4758, "queue_k_norm": 1.4013, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3317, "sent_len_1": 66.6883, "sent_len_max_0": 127.4363, "sent_len_max_1": 191.27, "stdk": 0.0458, "stdq": 0.0434, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 22100 }, { "accuracy": 40.1367, "active_queue_size": 16384.0, "cl_loss": 4.1951, "doc_norm": 1.398, "encoder_q-embeddings": 3378.5532, "encoder_q-layer.0": 2383.7751, "encoder_q-layer.1": 2414.8428, "encoder_q-layer.10": 358.65, "encoder_q-layer.11": 972.6286, "encoder_q-layer.2": 2560.781, "encoder_q-layer.3": 2883.4763, "encoder_q-layer.4": 2846.3152, "encoder_q-layer.5": 2655.3481, "encoder_q-layer.6": 2574.6804, "encoder_q-layer.7": 1948.2473, "encoder_q-layer.8": 901.704, "encoder_q-layer.9": 373.956, "epoch": 0.14, "inbatch_neg_score": 0.3674, "inbatch_pos_score": 0.874, "learning_rate": 4.3222222222222226e-05, "loss": 4.1951, "norm_diff": 0.0338, "norm_loss": 0.0, "num_token_doc": 66.8943, "num_token_overlap": 11.6547, "num_token_query": 31.3554, "num_token_union": 65.1538, "num_word_context": 202.3121, "num_word_doc": 49.9387, "num_word_query": 23.29, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3395.6914, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.364, "query_norm": 1.4319, "queue_k_norm": 1.4057, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3554, "sent_len_1": 66.8943, "sent_len_max_0": 127.4775, "sent_len_max_1": 190.1763, "stdk": 0.0455, "stdq": 0.0419, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 22200 }, { "accuracy": 40.0391, "active_queue_size": 16384.0, "cl_loss": 4.2143, "doc_norm": 1.4071, "encoder_q-embeddings": 2064.1482, "encoder_q-layer.0": 1559.5286, "encoder_q-layer.1": 1645.2915, "encoder_q-layer.10": 370.9857, "encoder_q-layer.11": 1031.8657, "encoder_q-layer.2": 1844.4982, "encoder_q-layer.3": 1810.1411, "encoder_q-layer.4": 1712.988, "encoder_q-layer.5": 1725.9395, "encoder_q-layer.6": 1674.2642, "encoder_q-layer.7": 1693.5424, "encoder_q-layer.8": 976.1812, "encoder_q-layer.9": 488.5285, "epoch": 0.15, "inbatch_neg_score": 0.3679, "inbatch_pos_score": 0.8667, "learning_rate": 4.316666666666667e-05, "loss": 4.2143, "norm_diff": 0.0561, "norm_loss": 0.0, "num_token_doc": 67.0379, "num_token_overlap": 11.6909, "num_token_query": 31.3782, "num_token_union": 65.2528, "num_word_context": 202.8345, "num_word_doc": 50.036, "num_word_query": 23.3096, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2286.3877, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3662, "query_norm": 1.4632, "queue_k_norm": 1.4065, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3782, "sent_len_1": 67.0379, "sent_len_max_0": 127.58, "sent_len_max_1": 190.1475, "stdk": 0.0459, "stdq": 0.0433, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 22300 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 4.2427, "doc_norm": 1.4112, "encoder_q-embeddings": 1431.2811, "encoder_q-layer.0": 955.8881, "encoder_q-layer.1": 1142.6648, "encoder_q-layer.10": 378.1346, "encoder_q-layer.11": 921.2073, "encoder_q-layer.2": 1314.6431, "encoder_q-layer.3": 1423.2916, "encoder_q-layer.4": 1310.9622, "encoder_q-layer.5": 1251.9214, "encoder_q-layer.6": 1316.4775, "encoder_q-layer.7": 1063.2827, "encoder_q-layer.8": 733.8118, "encoder_q-layer.9": 439.5178, "epoch": 0.15, "inbatch_neg_score": 0.3518, "inbatch_pos_score": 0.876, "learning_rate": 4.311111111111111e-05, "loss": 4.2427, "norm_diff": 0.0658, "norm_loss": 0.0, "num_token_doc": 66.9149, "num_token_overlap": 11.7005, "num_token_query": 31.4359, "num_token_union": 65.2447, "num_word_context": 202.387, "num_word_doc": 49.9308, "num_word_query": 23.3304, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1654.2821, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3506, "query_norm": 1.4769, "queue_k_norm": 1.407, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4359, "sent_len_1": 66.9149, "sent_len_max_0": 127.3988, "sent_len_max_1": 189.3375, "stdk": 0.046, "stdq": 0.0437, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 22400 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 4.2281, "doc_norm": 1.4054, "encoder_q-embeddings": 1851.4883, "encoder_q-layer.0": 1274.5168, "encoder_q-layer.1": 1431.5857, "encoder_q-layer.10": 179.9856, "encoder_q-layer.11": 460.1619, "encoder_q-layer.2": 1619.7384, "encoder_q-layer.3": 1899.7957, "encoder_q-layer.4": 1931.515, "encoder_q-layer.5": 1918.5374, "encoder_q-layer.6": 1590.5459, "encoder_q-layer.7": 1428.4683, "encoder_q-layer.8": 601.3956, "encoder_q-layer.9": 236.539, "epoch": 0.15, "inbatch_neg_score": 0.3396, "inbatch_pos_score": 0.8745, "learning_rate": 4.305555555555556e-05, "loss": 4.2281, "norm_diff": 0.0375, "norm_loss": 0.0, "num_token_doc": 66.8252, "num_token_overlap": 11.6921, "num_token_query": 31.4061, "num_token_union": 65.1603, "num_word_context": 202.2118, "num_word_doc": 49.885, "num_word_query": 23.316, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2115.5196, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3384, "query_norm": 1.4429, "queue_k_norm": 1.4069, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4061, "sent_len_1": 66.8252, "sent_len_max_0": 127.6112, "sent_len_max_1": 187.7138, "stdk": 0.0458, "stdq": 0.0429, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 22500 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 4.24, "doc_norm": 1.4082, "encoder_q-embeddings": 812.2401, "encoder_q-layer.0": 613.7451, "encoder_q-layer.1": 617.9669, "encoder_q-layer.10": 201.1351, "encoder_q-layer.11": 517.0518, "encoder_q-layer.2": 555.367, "encoder_q-layer.3": 548.5582, "encoder_q-layer.4": 560.3239, "encoder_q-layer.5": 491.4307, "encoder_q-layer.6": 500.4399, "encoder_q-layer.7": 578.8762, "encoder_q-layer.8": 478.3447, "encoder_q-layer.9": 260.5662, "epoch": 0.15, "inbatch_neg_score": 0.3557, "inbatch_pos_score": 0.8647, "learning_rate": 4.3e-05, "loss": 4.24, "norm_diff": 0.011, "norm_loss": 0.0, "num_token_doc": 66.8454, "num_token_overlap": 11.6627, "num_token_query": 31.452, "num_token_union": 65.2272, "num_word_context": 202.2998, "num_word_doc": 49.8589, "num_word_query": 23.3771, "postclip_grad_norm": 1.0, "preclip_grad_norm": 823.6359, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3562, "query_norm": 1.4083, "queue_k_norm": 1.4084, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.452, "sent_len_1": 66.8454, "sent_len_max_0": 127.6025, "sent_len_max_1": 187.6738, "stdk": 0.0459, "stdq": 0.0421, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 22600 }, { "accuracy": 40.625, "active_queue_size": 16384.0, "cl_loss": 4.234, "doc_norm": 1.4033, "encoder_q-embeddings": 7903.6851, "encoder_q-layer.0": 6195.1191, "encoder_q-layer.1": 6352.1948, "encoder_q-layer.10": 193.9021, "encoder_q-layer.11": 468.7889, "encoder_q-layer.2": 8649.3105, "encoder_q-layer.3": 9980.0029, "encoder_q-layer.4": 8758.4404, "encoder_q-layer.5": 4356.2485, "encoder_q-layer.6": 2918.9001, "encoder_q-layer.7": 2164.0398, "encoder_q-layer.8": 951.3954, "encoder_q-layer.9": 536.025, "epoch": 0.15, "inbatch_neg_score": 0.3562, "inbatch_pos_score": 0.8555, "learning_rate": 4.294444444444445e-05, "loss": 4.234, "norm_diff": 0.0224, "norm_loss": 0.0, "num_token_doc": 66.6024, "num_token_overlap": 11.6414, "num_token_query": 31.3275, "num_token_union": 65.0351, "num_word_context": 202.0603, "num_word_doc": 49.6889, "num_word_query": 23.2753, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9000.1032, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3533, "query_norm": 1.4193, "queue_k_norm": 1.4092, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3275, "sent_len_1": 66.6024, "sent_len_max_0": 127.4537, "sent_len_max_1": 189.6037, "stdk": 0.0457, "stdq": 0.0425, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 22700 }, { "accuracy": 41.4062, "active_queue_size": 16384.0, "cl_loss": 4.2136, "doc_norm": 1.4083, "encoder_q-embeddings": 917.5504, "encoder_q-layer.0": 625.3934, "encoder_q-layer.1": 719.2228, "encoder_q-layer.10": 196.3037, "encoder_q-layer.11": 508.2208, "encoder_q-layer.2": 763.4182, "encoder_q-layer.3": 784.2974, "encoder_q-layer.4": 807.1794, "encoder_q-layer.5": 643.9612, "encoder_q-layer.6": 487.524, "encoder_q-layer.7": 372.621, "encoder_q-layer.8": 238.5081, "encoder_q-layer.9": 178.2471, "epoch": 0.15, "inbatch_neg_score": 0.354, "inbatch_pos_score": 0.8613, "learning_rate": 4.2888888888888886e-05, "loss": 4.2136, "norm_diff": 0.0184, "norm_loss": 0.0, "num_token_doc": 66.7943, "num_token_overlap": 11.6662, "num_token_query": 31.3053, "num_token_union": 65.1048, "num_word_context": 202.5271, "num_word_doc": 49.8758, "num_word_query": 23.2424, "postclip_grad_norm": 1.0, "preclip_grad_norm": 923.8352, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3545, "query_norm": 1.4267, "queue_k_norm": 1.4102, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3053, "sent_len_1": 66.7943, "sent_len_max_0": 127.2725, "sent_len_max_1": 187.81, "stdk": 0.046, "stdq": 0.0427, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 22800 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 4.2321, "doc_norm": 1.4148, "encoder_q-embeddings": 1103.6012, "encoder_q-layer.0": 820.4691, "encoder_q-layer.1": 889.8951, "encoder_q-layer.10": 213.6357, "encoder_q-layer.11": 496.2619, "encoder_q-layer.2": 1040.5386, "encoder_q-layer.3": 1123.6212, "encoder_q-layer.4": 1064.3612, "encoder_q-layer.5": 836.9691, "encoder_q-layer.6": 870.0422, "encoder_q-layer.7": 743.1516, "encoder_q-layer.8": 519.0166, "encoder_q-layer.9": 227.152, "epoch": 0.15, "inbatch_neg_score": 0.3579, "inbatch_pos_score": 0.8755, "learning_rate": 4.2833333333333335e-05, "loss": 4.2321, "norm_diff": 0.0174, "norm_loss": 0.0, "num_token_doc": 66.8309, "num_token_overlap": 11.6926, "num_token_query": 31.3847, "num_token_union": 65.0908, "num_word_context": 202.1319, "num_word_doc": 49.8519, "num_word_query": 23.3057, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1231.2844, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3567, "query_norm": 1.4313, "queue_k_norm": 1.4122, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3847, "sent_len_1": 66.8309, "sent_len_max_0": 127.5425, "sent_len_max_1": 191.6125, "stdk": 0.0462, "stdq": 0.0424, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 22900 }, { "accuracy": 40.8203, "active_queue_size": 16384.0, "cl_loss": 4.2083, "doc_norm": 1.4072, "encoder_q-embeddings": 617.1016, "encoder_q-layer.0": 430.5724, "encoder_q-layer.1": 443.913, "encoder_q-layer.10": 185.1546, "encoder_q-layer.11": 498.535, "encoder_q-layer.2": 459.8797, "encoder_q-layer.3": 425.9521, "encoder_q-layer.4": 405.8913, "encoder_q-layer.5": 272.1431, "encoder_q-layer.6": 292.0879, "encoder_q-layer.7": 261.9841, "encoder_q-layer.8": 232.6608, "encoder_q-layer.9": 183.2647, "epoch": 0.15, "inbatch_neg_score": 0.3488, "inbatch_pos_score": 0.8765, "learning_rate": 4.277777777777778e-05, "loss": 4.2083, "norm_diff": 0.0458, "norm_loss": 0.0, "num_token_doc": 66.5651, "num_token_overlap": 11.6413, "num_token_query": 31.3076, "num_token_union": 64.9638, "num_word_context": 201.8521, "num_word_doc": 49.6858, "num_word_query": 23.2571, "postclip_grad_norm": 1.0, "preclip_grad_norm": 611.9887, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3489, "query_norm": 1.453, "queue_k_norm": 1.412, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3076, "sent_len_1": 66.5651, "sent_len_max_0": 127.4038, "sent_len_max_1": 188.3875, "stdk": 0.046, "stdq": 0.043, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 23000 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.2424, "doc_norm": 1.412, "encoder_q-embeddings": 828.1259, "encoder_q-layer.0": 595.8784, "encoder_q-layer.1": 687.9651, "encoder_q-layer.10": 175.7074, "encoder_q-layer.11": 474.0934, "encoder_q-layer.2": 788.1247, "encoder_q-layer.3": 865.2121, "encoder_q-layer.4": 803.4326, "encoder_q-layer.5": 641.8749, "encoder_q-layer.6": 630.5283, "encoder_q-layer.7": 472.2698, "encoder_q-layer.8": 285.5755, "encoder_q-layer.9": 195.1448, "epoch": 0.15, "inbatch_neg_score": 0.358, "inbatch_pos_score": 0.8848, "learning_rate": 4.272222222222223e-05, "loss": 4.2424, "norm_diff": 0.0398, "norm_loss": 0.0, "num_token_doc": 66.9781, "num_token_overlap": 11.6701, "num_token_query": 31.37, "num_token_union": 65.2136, "num_word_context": 202.4521, "num_word_doc": 49.9904, "num_word_query": 23.305, "postclip_grad_norm": 1.0, "preclip_grad_norm": 919.8331, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3574, "query_norm": 1.4518, "queue_k_norm": 1.4122, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.37, "sent_len_1": 66.9781, "sent_len_max_0": 127.67, "sent_len_max_1": 188.2388, "stdk": 0.0462, "stdq": 0.043, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 23100 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 4.2135, "doc_norm": 1.4176, "encoder_q-embeddings": 1805.3871, "encoder_q-layer.0": 1361.8771, "encoder_q-layer.1": 1390.9294, "encoder_q-layer.10": 167.6052, "encoder_q-layer.11": 457.3246, "encoder_q-layer.2": 1203.7834, "encoder_q-layer.3": 1154.5959, "encoder_q-layer.4": 1127.944, "encoder_q-layer.5": 913.4958, "encoder_q-layer.6": 1089.4938, "encoder_q-layer.7": 786.8265, "encoder_q-layer.8": 439.6427, "encoder_q-layer.9": 209.0801, "epoch": 0.15, "inbatch_neg_score": 0.3351, "inbatch_pos_score": 0.8369, "learning_rate": 4.266666666666667e-05, "loss": 4.2135, "norm_diff": 0.0438, "norm_loss": 0.0, "num_token_doc": 66.8538, "num_token_overlap": 11.7113, "num_token_query": 31.422, "num_token_union": 65.2114, "num_word_context": 202.3722, "num_word_doc": 49.9105, "num_word_query": 23.3404, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1652.5809, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3325, "query_norm": 1.3737, "queue_k_norm": 1.4131, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.422, "sent_len_1": 66.8538, "sent_len_max_0": 127.5212, "sent_len_max_1": 189.2788, "stdk": 0.0463, "stdq": 0.0409, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 23200 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.2164, "doc_norm": 1.4102, "encoder_q-embeddings": 418.2192, "encoder_q-layer.0": 308.0792, "encoder_q-layer.1": 313.2693, "encoder_q-layer.10": 173.6441, "encoder_q-layer.11": 442.6754, "encoder_q-layer.2": 340.1524, "encoder_q-layer.3": 327.7224, "encoder_q-layer.4": 323.4497, "encoder_q-layer.5": 280.1534, "encoder_q-layer.6": 321.5224, "encoder_q-layer.7": 311.8639, "encoder_q-layer.8": 240.841, "encoder_q-layer.9": 170.3344, "epoch": 0.15, "inbatch_neg_score": 0.3229, "inbatch_pos_score": 0.8467, "learning_rate": 4.261111111111111e-05, "loss": 4.2164, "norm_diff": 0.0122, "norm_loss": 0.0, "num_token_doc": 66.7559, "num_token_overlap": 11.6625, "num_token_query": 31.3852, "num_token_union": 65.1223, "num_word_context": 202.3722, "num_word_doc": 49.8487, "num_word_query": 23.3201, "postclip_grad_norm": 1.0, "preclip_grad_norm": 484.7428, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3225, "query_norm": 1.4043, "queue_k_norm": 1.4091, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3852, "sent_len_1": 66.7559, "sent_len_max_0": 127.5175, "sent_len_max_1": 186.9625, "stdk": 0.0462, "stdq": 0.0422, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 23300 }, { "accuracy": 40.7227, "active_queue_size": 16384.0, "cl_loss": 4.2146, "doc_norm": 1.4105, "encoder_q-embeddings": 2390.814, "encoder_q-layer.0": 1907.7988, "encoder_q-layer.1": 2656.5437, "encoder_q-layer.10": 171.7895, "encoder_q-layer.11": 443.3076, "encoder_q-layer.2": 1786.2089, "encoder_q-layer.3": 1607.9891, "encoder_q-layer.4": 1596.8198, "encoder_q-layer.5": 1088.793, "encoder_q-layer.6": 561.9465, "encoder_q-layer.7": 409.5623, "encoder_q-layer.8": 322.8333, "encoder_q-layer.9": 197.4434, "epoch": 0.15, "inbatch_neg_score": 0.3025, "inbatch_pos_score": 0.8232, "learning_rate": 4.255555555555556e-05, "loss": 4.2146, "norm_diff": 0.018, "norm_loss": 0.0, "num_token_doc": 66.6408, "num_token_overlap": 11.6624, "num_token_query": 31.2644, "num_token_union": 64.9656, "num_word_context": 201.9817, "num_word_doc": 49.7023, "num_word_query": 23.2302, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2270.591, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3015, "query_norm": 1.3945, "queue_k_norm": 1.4114, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2644, "sent_len_1": 66.6408, "sent_len_max_0": 127.6238, "sent_len_max_1": 189.915, "stdk": 0.0462, "stdq": 0.0423, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 23400 }, { "accuracy": 42.4805, "active_queue_size": 16384.0, "cl_loss": 4.2229, "doc_norm": 1.4043, "encoder_q-embeddings": 2262.4668, "encoder_q-layer.0": 1686.6165, "encoder_q-layer.1": 1705.4559, "encoder_q-layer.10": 191.5372, "encoder_q-layer.11": 449.7281, "encoder_q-layer.2": 1971.9071, "encoder_q-layer.3": 2095.1414, "encoder_q-layer.4": 2120.3152, "encoder_q-layer.5": 2091.1731, "encoder_q-layer.6": 2258.7468, "encoder_q-layer.7": 1504.5515, "encoder_q-layer.8": 829.5911, "encoder_q-layer.9": 429.3896, "epoch": 0.15, "inbatch_neg_score": 0.2785, "inbatch_pos_score": 0.7896, "learning_rate": 4.25e-05, "loss": 4.2229, "norm_diff": 0.0292, "norm_loss": 0.0, "num_token_doc": 66.8299, "num_token_overlap": 11.6655, "num_token_query": 31.3808, "num_token_union": 65.1569, "num_word_context": 202.4069, "num_word_doc": 49.86, "num_word_query": 23.3077, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2533.733, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2771, "query_norm": 1.3751, "queue_k_norm": 1.4105, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3808, "sent_len_1": 66.8299, "sent_len_max_0": 127.245, "sent_len_max_1": 191.615, "stdk": 0.0461, "stdq": 0.0418, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 23500 }, { "accuracy": 40.5273, "active_queue_size": 16384.0, "cl_loss": 4.2033, "doc_norm": 1.404, "encoder_q-embeddings": 1124.9176, "encoder_q-layer.0": 777.9661, "encoder_q-layer.1": 874.7233, "encoder_q-layer.10": 196.5435, "encoder_q-layer.11": 479.7431, "encoder_q-layer.2": 990.0276, "encoder_q-layer.3": 1093.1285, "encoder_q-layer.4": 1151.7048, "encoder_q-layer.5": 1021.3215, "encoder_q-layer.6": 922.2772, "encoder_q-layer.7": 545.3718, "encoder_q-layer.8": 305.3636, "encoder_q-layer.9": 181.5211, "epoch": 0.15, "inbatch_neg_score": 0.2705, "inbatch_pos_score": 0.7852, "learning_rate": 4.2444444444444445e-05, "loss": 4.2033, "norm_diff": 0.0138, "norm_loss": 0.0, "num_token_doc": 66.6199, "num_token_overlap": 11.7091, "num_token_query": 31.4952, "num_token_union": 65.0765, "num_word_context": 202.4352, "num_word_doc": 49.697, "num_word_query": 23.4083, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1221.9847, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.269, "query_norm": 1.41, "queue_k_norm": 1.4064, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4952, "sent_len_1": 66.6199, "sent_len_max_0": 127.4762, "sent_len_max_1": 189.0387, "stdk": 0.0461, "stdq": 0.0433, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 23600 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 4.2073, "doc_norm": 1.405, "encoder_q-embeddings": 375.9568, "encoder_q-layer.0": 273.6297, "encoder_q-layer.1": 277.8601, "encoder_q-layer.10": 192.8575, "encoder_q-layer.11": 480.2999, "encoder_q-layer.2": 311.006, "encoder_q-layer.3": 275.2122, "encoder_q-layer.4": 268.9673, "encoder_q-layer.5": 191.7004, "encoder_q-layer.6": 194.4348, "encoder_q-layer.7": 192.3051, "encoder_q-layer.8": 215.3337, "encoder_q-layer.9": 178.9894, "epoch": 0.15, "inbatch_neg_score": 0.2902, "inbatch_pos_score": 0.8164, "learning_rate": 4.238888888888889e-05, "loss": 4.2073, "norm_diff": 0.0153, "norm_loss": 0.0, "num_token_doc": 66.8163, "num_token_overlap": 11.6488, "num_token_query": 31.3039, "num_token_union": 65.1247, "num_word_context": 202.253, "num_word_doc": 49.8303, "num_word_query": 23.2437, "postclip_grad_norm": 1.0, "preclip_grad_norm": 425.2493, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2876, "query_norm": 1.4109, "queue_k_norm": 1.4059, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3039, "sent_len_1": 66.8163, "sent_len_max_0": 127.3662, "sent_len_max_1": 190.5662, "stdk": 0.0462, "stdq": 0.0433, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 23700 }, { "accuracy": 41.4062, "active_queue_size": 16384.0, "cl_loss": 4.2204, "doc_norm": 1.4038, "encoder_q-embeddings": 597.8483, "encoder_q-layer.0": 442.0331, "encoder_q-layer.1": 477.8293, "encoder_q-layer.10": 200.1242, "encoder_q-layer.11": 508.0533, "encoder_q-layer.2": 500.8749, "encoder_q-layer.3": 472.1872, "encoder_q-layer.4": 377.452, "encoder_q-layer.5": 295.3957, "encoder_q-layer.6": 289.9319, "encoder_q-layer.7": 259.274, "encoder_q-layer.8": 215.0475, "encoder_q-layer.9": 173.3872, "epoch": 0.15, "inbatch_neg_score": 0.2812, "inbatch_pos_score": 0.7822, "learning_rate": 4.233333333333334e-05, "loss": 4.2204, "norm_diff": 0.0557, "norm_loss": 0.0, "num_token_doc": 66.9875, "num_token_overlap": 11.699, "num_token_query": 31.3751, "num_token_union": 65.1978, "num_word_context": 202.5907, "num_word_doc": 49.9608, "num_word_query": 23.2823, "postclip_grad_norm": 1.0, "preclip_grad_norm": 613.8292, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2798, "query_norm": 1.3481, "queue_k_norm": 1.4017, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3751, "sent_len_1": 66.9875, "sent_len_max_0": 127.4325, "sent_len_max_1": 190.0475, "stdk": 0.0462, "stdq": 0.0408, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 23800 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 4.2191, "doc_norm": 1.3951, "encoder_q-embeddings": 1185.0864, "encoder_q-layer.0": 904.1301, "encoder_q-layer.1": 953.9145, "encoder_q-layer.10": 185.6537, "encoder_q-layer.11": 503.3963, "encoder_q-layer.2": 1077.9329, "encoder_q-layer.3": 1225.493, "encoder_q-layer.4": 1122.6742, "encoder_q-layer.5": 956.1677, "encoder_q-layer.6": 984.9972, "encoder_q-layer.7": 908.4866, "encoder_q-layer.8": 527.8185, "encoder_q-layer.9": 214.4165, "epoch": 0.16, "inbatch_neg_score": 0.2952, "inbatch_pos_score": 0.8271, "learning_rate": 4.227777777777778e-05, "loss": 4.2191, "norm_diff": 0.0197, "norm_loss": 0.0, "num_token_doc": 66.5634, "num_token_overlap": 11.6697, "num_token_query": 31.3144, "num_token_union": 64.9544, "num_word_context": 201.9923, "num_word_doc": 49.6702, "num_word_query": 23.2602, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1348.5985, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2947, "query_norm": 1.3754, "queue_k_norm": 1.4001, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3144, "sent_len_1": 66.5634, "sent_len_max_0": 127.6388, "sent_len_max_1": 188.3363, "stdk": 0.0459, "stdq": 0.042, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 23900 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.1907, "doc_norm": 1.3985, "encoder_q-embeddings": 433.3125, "encoder_q-layer.0": 302.6363, "encoder_q-layer.1": 342.1629, "encoder_q-layer.10": 225.6754, "encoder_q-layer.11": 561.581, "encoder_q-layer.2": 318.399, "encoder_q-layer.3": 278.0886, "encoder_q-layer.4": 262.328, "encoder_q-layer.5": 243.4383, "encoder_q-layer.6": 242.2181, "encoder_q-layer.7": 267.8647, "encoder_q-layer.8": 256.6597, "encoder_q-layer.9": 213.1892, "epoch": 0.16, "inbatch_neg_score": 0.2963, "inbatch_pos_score": 0.8169, "learning_rate": 4.222222222222222e-05, "loss": 4.1907, "norm_diff": 0.0228, "norm_loss": 0.0, "num_token_doc": 66.8893, "num_token_overlap": 11.7393, "num_token_query": 31.4919, "num_token_union": 65.1823, "num_word_context": 202.2721, "num_word_doc": 49.9097, "num_word_query": 23.4012, "postclip_grad_norm": 1.0, "preclip_grad_norm": 491.6371, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2966, "query_norm": 1.4213, "queue_k_norm": 1.3982, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4919, "sent_len_1": 66.8893, "sent_len_max_0": 127.4525, "sent_len_max_1": 189.1275, "stdk": 0.0461, "stdq": 0.0437, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 24000 }, { "accuracy": 41.4062, "active_queue_size": 16384.0, "cl_loss": 4.1947, "doc_norm": 1.3953, "encoder_q-embeddings": 1691.8213, "encoder_q-layer.0": 1201.5872, "encoder_q-layer.1": 1396.214, "encoder_q-layer.10": 219.5795, "encoder_q-layer.11": 519.6553, "encoder_q-layer.2": 1625.0828, "encoder_q-layer.3": 1882.531, "encoder_q-layer.4": 1617.3114, "encoder_q-layer.5": 1382.424, "encoder_q-layer.6": 1109.0051, "encoder_q-layer.7": 978.2645, "encoder_q-layer.8": 654.6601, "encoder_q-layer.9": 262.1078, "epoch": 0.16, "inbatch_neg_score": 0.3042, "inbatch_pos_score": 0.8315, "learning_rate": 4.216666666666667e-05, "loss": 4.1947, "norm_diff": 0.0167, "norm_loss": 0.0, "num_token_doc": 66.8292, "num_token_overlap": 11.7013, "num_token_query": 31.4655, "num_token_union": 65.2115, "num_word_context": 202.6799, "num_word_doc": 49.8787, "num_word_query": 23.3694, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1868.4994, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3044, "query_norm": 1.4052, "queue_k_norm": 1.397, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4655, "sent_len_1": 66.8292, "sent_len_max_0": 127.4737, "sent_len_max_1": 189.8762, "stdk": 0.046, "stdq": 0.0431, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 24100 }, { "accuracy": 42.8711, "active_queue_size": 16384.0, "cl_loss": 4.2103, "doc_norm": 1.395, "encoder_q-embeddings": 569.1805, "encoder_q-layer.0": 407.677, "encoder_q-layer.1": 473.9407, "encoder_q-layer.10": 90.6651, "encoder_q-layer.11": 230.7738, "encoder_q-layer.2": 560.6623, "encoder_q-layer.3": 654.4891, "encoder_q-layer.4": 615.6613, "encoder_q-layer.5": 618.4448, "encoder_q-layer.6": 602.917, "encoder_q-layer.7": 441.4373, "encoder_q-layer.8": 295.7739, "encoder_q-layer.9": 104.2825, "epoch": 0.16, "inbatch_neg_score": 0.3037, "inbatch_pos_score": 0.8306, "learning_rate": 4.211111111111111e-05, "loss": 4.2103, "norm_diff": 0.0125, "norm_loss": 0.0, "num_token_doc": 66.8249, "num_token_overlap": 11.6911, "num_token_query": 31.3968, "num_token_union": 65.1408, "num_word_context": 202.2852, "num_word_doc": 49.8327, "num_word_query": 23.3103, "postclip_grad_norm": 1.0, "preclip_grad_norm": 710.3027, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3042, "query_norm": 1.3956, "queue_k_norm": 1.3963, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3968, "sent_len_1": 66.8249, "sent_len_max_0": 127.4675, "sent_len_max_1": 191.9412, "stdk": 0.046, "stdq": 0.0426, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 24200 }, { "accuracy": 40.4297, "active_queue_size": 16384.0, "cl_loss": 4.1882, "doc_norm": 1.3984, "encoder_q-embeddings": 279.8459, "encoder_q-layer.0": 207.1531, "encoder_q-layer.1": 245.6687, "encoder_q-layer.10": 96.4569, "encoder_q-layer.11": 239.2936, "encoder_q-layer.2": 273.772, "encoder_q-layer.3": 288.9573, "encoder_q-layer.4": 246.9283, "encoder_q-layer.5": 215.7104, "encoder_q-layer.6": 205.1035, "encoder_q-layer.7": 201.7823, "encoder_q-layer.8": 152.6414, "encoder_q-layer.9": 95.6758, "epoch": 0.16, "inbatch_neg_score": 0.2967, "inbatch_pos_score": 0.8052, "learning_rate": 4.205555555555556e-05, "loss": 4.1882, "norm_diff": 0.0177, "norm_loss": 0.0, "num_token_doc": 66.999, "num_token_overlap": 11.7036, "num_token_query": 31.3902, "num_token_union": 65.2379, "num_word_context": 202.8212, "num_word_doc": 49.956, "num_word_query": 23.3099, "postclip_grad_norm": 1.0, "preclip_grad_norm": 333.6678, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2954, "query_norm": 1.3807, "queue_k_norm": 1.3983, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3902, "sent_len_1": 66.999, "sent_len_max_0": 127.54, "sent_len_max_1": 190.69, "stdk": 0.0462, "stdq": 0.0422, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 24300 }, { "accuracy": 41.8945, "active_queue_size": 16384.0, "cl_loss": 4.2245, "doc_norm": 1.3962, "encoder_q-embeddings": 1744.806, "encoder_q-layer.0": 1230.9751, "encoder_q-layer.1": 1601.701, "encoder_q-layer.10": 101.3123, "encoder_q-layer.11": 263.774, "encoder_q-layer.2": 1978.8433, "encoder_q-layer.3": 2270.6558, "encoder_q-layer.4": 2326.4204, "encoder_q-layer.5": 1905.2167, "encoder_q-layer.6": 1505.8014, "encoder_q-layer.7": 1457.6787, "encoder_q-layer.8": 877.6096, "encoder_q-layer.9": 228.7836, "epoch": 0.16, "inbatch_neg_score": 0.305, "inbatch_pos_score": 0.8042, "learning_rate": 4.2e-05, "loss": 4.2245, "norm_diff": 0.0409, "norm_loss": 0.0, "num_token_doc": 66.8321, "num_token_overlap": 11.6926, "num_token_query": 31.4321, "num_token_union": 65.1234, "num_word_context": 202.7341, "num_word_doc": 49.8508, "num_word_query": 23.3476, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2271.4896, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3042, "query_norm": 1.3552, "queue_k_norm": 1.3951, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4321, "sent_len_1": 66.8321, "sent_len_max_0": 127.4838, "sent_len_max_1": 190.7512, "stdk": 0.0461, "stdq": 0.0413, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 24400 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 4.1822, "doc_norm": 1.3956, "encoder_q-embeddings": 426.5174, "encoder_q-layer.0": 321.3613, "encoder_q-layer.1": 324.0213, "encoder_q-layer.10": 98.6177, "encoder_q-layer.11": 263.1242, "encoder_q-layer.2": 345.2461, "encoder_q-layer.3": 356.734, "encoder_q-layer.4": 322.2186, "encoder_q-layer.5": 253.5472, "encoder_q-layer.6": 235.5953, "encoder_q-layer.7": 198.4012, "encoder_q-layer.8": 156.4408, "encoder_q-layer.9": 108.0429, "epoch": 0.16, "inbatch_neg_score": 0.3065, "inbatch_pos_score": 0.8223, "learning_rate": 4.194444444444445e-05, "loss": 4.1822, "norm_diff": 0.0125, "norm_loss": 0.0, "num_token_doc": 66.8508, "num_token_overlap": 11.6767, "num_token_query": 31.473, "num_token_union": 65.1933, "num_word_context": 202.5958, "num_word_doc": 49.8626, "num_word_query": 23.3796, "postclip_grad_norm": 1.0, "preclip_grad_norm": 432.8381, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3044, "query_norm": 1.3896, "queue_k_norm": 1.3947, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.473, "sent_len_1": 66.8508, "sent_len_max_0": 127.4225, "sent_len_max_1": 189.825, "stdk": 0.046, "stdq": 0.0428, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 24500 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 4.2015, "doc_norm": 1.3937, "encoder_q-embeddings": 582.845, "encoder_q-layer.0": 405.7719, "encoder_q-layer.1": 444.9276, "encoder_q-layer.10": 91.3585, "encoder_q-layer.11": 235.136, "encoder_q-layer.2": 496.2406, "encoder_q-layer.3": 516.6314, "encoder_q-layer.4": 501.3707, "encoder_q-layer.5": 428.2476, "encoder_q-layer.6": 389.9235, "encoder_q-layer.7": 334.6195, "encoder_q-layer.8": 214.715, "encoder_q-layer.9": 118.2956, "epoch": 0.16, "inbatch_neg_score": 0.2879, "inbatch_pos_score": 0.7998, "learning_rate": 4.188888888888889e-05, "loss": 4.2015, "norm_diff": 0.0333, "norm_loss": 0.0, "num_token_doc": 66.5194, "num_token_overlap": 11.6547, "num_token_query": 31.3965, "num_token_union": 65.0251, "num_word_context": 202.291, "num_word_doc": 49.6364, "num_word_query": 23.3022, "postclip_grad_norm": 1.0, "preclip_grad_norm": 603.6007, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2881, "query_norm": 1.3604, "queue_k_norm": 1.3931, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3965, "sent_len_1": 66.5194, "sent_len_max_0": 127.5012, "sent_len_max_1": 190.5888, "stdk": 0.046, "stdq": 0.0419, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 24600 }, { "accuracy": 40.0391, "active_queue_size": 16384.0, "cl_loss": 4.1975, "doc_norm": 1.3917, "encoder_q-embeddings": 167.0096, "encoder_q-layer.0": 118.9998, "encoder_q-layer.1": 130.1984, "encoder_q-layer.10": 97.8155, "encoder_q-layer.11": 263.4872, "encoder_q-layer.2": 140.8653, "encoder_q-layer.3": 136.0852, "encoder_q-layer.4": 132.8887, "encoder_q-layer.5": 106.3914, "encoder_q-layer.6": 102.6672, "encoder_q-layer.7": 88.8587, "encoder_q-layer.8": 93.4399, "encoder_q-layer.9": 87.7963, "epoch": 0.16, "inbatch_neg_score": 0.2859, "inbatch_pos_score": 0.7944, "learning_rate": 4.183333333333334e-05, "loss": 4.1975, "norm_diff": 0.0213, "norm_loss": 0.0, "num_token_doc": 66.5278, "num_token_overlap": 11.664, "num_token_query": 31.3917, "num_token_union": 65.0058, "num_word_context": 202.1148, "num_word_doc": 49.6384, "num_word_query": 23.3156, "postclip_grad_norm": 1.0, "preclip_grad_norm": 212.9534, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2861, "query_norm": 1.3722, "queue_k_norm": 1.3938, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3917, "sent_len_1": 66.5278, "sent_len_max_0": 127.3637, "sent_len_max_1": 189.3638, "stdk": 0.046, "stdq": 0.042, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 24700 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 4.1862, "doc_norm": 1.4054, "encoder_q-embeddings": 391.6592, "encoder_q-layer.0": 288.9707, "encoder_q-layer.1": 319.2023, "encoder_q-layer.10": 98.7755, "encoder_q-layer.11": 247.7764, "encoder_q-layer.2": 342.821, "encoder_q-layer.3": 350.0613, "encoder_q-layer.4": 344.1983, "encoder_q-layer.5": 348.4965, "encoder_q-layer.6": 315.514, "encoder_q-layer.7": 236.4609, "encoder_q-layer.8": 158.5081, "encoder_q-layer.9": 99.845, "epoch": 0.16, "inbatch_neg_score": 0.2719, "inbatch_pos_score": 0.7993, "learning_rate": 4.177777777777778e-05, "loss": 4.1862, "norm_diff": 0.0336, "norm_loss": 0.0, "num_token_doc": 66.9053, "num_token_overlap": 11.7282, "num_token_query": 31.4069, "num_token_union": 65.1555, "num_word_context": 202.5154, "num_word_doc": 49.9313, "num_word_query": 23.3562, "postclip_grad_norm": 1.0, "preclip_grad_norm": 432.7888, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2737, "query_norm": 1.3731, "queue_k_norm": 1.3936, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4069, "sent_len_1": 66.9053, "sent_len_max_0": 127.5362, "sent_len_max_1": 188.3625, "stdk": 0.0466, "stdq": 0.0421, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 24800 }, { "accuracy": 41.5039, "active_queue_size": 16384.0, "cl_loss": 4.1972, "doc_norm": 1.3894, "encoder_q-embeddings": 244.7301, "encoder_q-layer.0": 180.0966, "encoder_q-layer.1": 188.8656, "encoder_q-layer.10": 88.6442, "encoder_q-layer.11": 239.6724, "encoder_q-layer.2": 200.8532, "encoder_q-layer.3": 224.7594, "encoder_q-layer.4": 226.6092, "encoder_q-layer.5": 227.124, "encoder_q-layer.6": 193.1153, "encoder_q-layer.7": 161.1869, "encoder_q-layer.8": 127.2026, "encoder_q-layer.9": 86.8676, "epoch": 0.16, "inbatch_neg_score": 0.2612, "inbatch_pos_score": 0.7803, "learning_rate": 4.172222222222222e-05, "loss": 4.1972, "norm_diff": 0.0211, "norm_loss": 0.0, "num_token_doc": 66.7542, "num_token_overlap": 11.7029, "num_token_query": 31.3007, "num_token_union": 65.0567, "num_word_context": 202.7281, "num_word_doc": 49.853, "num_word_query": 23.2545, "postclip_grad_norm": 1.0, "preclip_grad_norm": 287.9938, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.261, "query_norm": 1.3683, "queue_k_norm": 1.3921, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3007, "sent_len_1": 66.7542, "sent_len_max_0": 127.46, "sent_len_max_1": 189.4863, "stdk": 0.046, "stdq": 0.0425, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 24900 }, { "accuracy": 40.918, "active_queue_size": 16384.0, "cl_loss": 4.1919, "doc_norm": 1.3881, "encoder_q-embeddings": 402.7535, "encoder_q-layer.0": 277.417, "encoder_q-layer.1": 348.753, "encoder_q-layer.10": 90.8313, "encoder_q-layer.11": 258.6181, "encoder_q-layer.2": 380.0843, "encoder_q-layer.3": 409.6173, "encoder_q-layer.4": 334.3137, "encoder_q-layer.5": 289.4736, "encoder_q-layer.6": 231.1349, "encoder_q-layer.7": 167.0116, "encoder_q-layer.8": 128.737, "encoder_q-layer.9": 80.1258, "epoch": 0.16, "inbatch_neg_score": 0.2649, "inbatch_pos_score": 0.7578, "learning_rate": 4.166666666666667e-05, "loss": 4.1919, "norm_diff": 0.0344, "norm_loss": 0.0, "num_token_doc": 66.8743, "num_token_overlap": 11.7532, "num_token_query": 31.5455, "num_token_union": 65.1964, "num_word_context": 202.5764, "num_word_doc": 49.911, "num_word_query": 23.434, "postclip_grad_norm": 1.0, "preclip_grad_norm": 431.7572, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2639, "query_norm": 1.3537, "queue_k_norm": 1.3902, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5455, "sent_len_1": 66.8743, "sent_len_max_0": 127.5075, "sent_len_max_1": 189.2488, "stdk": 0.046, "stdq": 0.0421, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 25000 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 4.2017, "doc_norm": 1.3929, "encoder_q-embeddings": 170.1108, "encoder_q-layer.0": 124.8773, "encoder_q-layer.1": 129.1347, "encoder_q-layer.10": 98.8908, "encoder_q-layer.11": 264.0729, "encoder_q-layer.2": 157.3173, "encoder_q-layer.3": 186.2044, "encoder_q-layer.4": 198.9339, "encoder_q-layer.5": 196.7694, "encoder_q-layer.6": 202.2583, "encoder_q-layer.7": 162.6589, "encoder_q-layer.8": 115.1216, "encoder_q-layer.9": 84.0148, "epoch": 0.16, "inbatch_neg_score": 0.262, "inbatch_pos_score": 0.7803, "learning_rate": 4.1611111111111114e-05, "loss": 4.2017, "norm_diff": 0.0326, "norm_loss": 0.0, "num_token_doc": 66.667, "num_token_overlap": 11.7099, "num_token_query": 31.4766, "num_token_union": 65.0784, "num_word_context": 202.1868, "num_word_doc": 49.7414, "num_word_query": 23.3814, "postclip_grad_norm": 1.0, "preclip_grad_norm": 252.9363, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.261, "query_norm": 1.3607, "queue_k_norm": 1.3901, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4766, "sent_len_1": 66.667, "sent_len_max_0": 127.6, "sent_len_max_1": 188.96, "stdk": 0.0462, "stdq": 0.0418, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 25100 }, { "accuracy": 43.1641, "active_queue_size": 16384.0, "cl_loss": 4.1905, "doc_norm": 1.3916, "encoder_q-embeddings": 306.6861, "encoder_q-layer.0": 224.5826, "encoder_q-layer.1": 260.8346, "encoder_q-layer.10": 95.6788, "encoder_q-layer.11": 255.1313, "encoder_q-layer.2": 299.2969, "encoder_q-layer.3": 322.7177, "encoder_q-layer.4": 328.0632, "encoder_q-layer.5": 289.6117, "encoder_q-layer.6": 270.9177, "encoder_q-layer.7": 212.7574, "encoder_q-layer.8": 136.3099, "encoder_q-layer.9": 86.2461, "epoch": 0.16, "inbatch_neg_score": 0.2736, "inbatch_pos_score": 0.7866, "learning_rate": 4.155555555555556e-05, "loss": 4.1905, "norm_diff": 0.0156, "norm_loss": 0.0, "num_token_doc": 67.0049, "num_token_overlap": 11.6362, "num_token_query": 31.3263, "num_token_union": 65.2622, "num_word_context": 202.5597, "num_word_doc": 50.0144, "num_word_query": 23.2691, "postclip_grad_norm": 1.0, "preclip_grad_norm": 373.7788, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2729, "query_norm": 1.3871, "queue_k_norm": 1.3876, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3263, "sent_len_1": 67.0049, "sent_len_max_0": 127.4, "sent_len_max_1": 190.635, "stdk": 0.0462, "stdq": 0.0422, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 25200 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 4.2162, "doc_norm": 1.3899, "encoder_q-embeddings": 405.7254, "encoder_q-layer.0": 272.6768, "encoder_q-layer.1": 292.9988, "encoder_q-layer.10": 90.6295, "encoder_q-layer.11": 241.7337, "encoder_q-layer.2": 313.6202, "encoder_q-layer.3": 325.1842, "encoder_q-layer.4": 315.4797, "encoder_q-layer.5": 285.2514, "encoder_q-layer.6": 265.8727, "encoder_q-layer.7": 198.4343, "encoder_q-layer.8": 128.6528, "encoder_q-layer.9": 84.5905, "epoch": 0.16, "inbatch_neg_score": 0.2731, "inbatch_pos_score": 0.8086, "learning_rate": 4.15e-05, "loss": 4.2162, "norm_diff": 0.0211, "norm_loss": 0.0, "num_token_doc": 66.7939, "num_token_overlap": 11.6357, "num_token_query": 31.391, "num_token_union": 65.1373, "num_word_context": 202.408, "num_word_doc": 49.8044, "num_word_query": 23.3238, "postclip_grad_norm": 1.0, "preclip_grad_norm": 409.6231, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2739, "query_norm": 1.3689, "queue_k_norm": 1.3856, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.391, "sent_len_1": 66.7939, "sent_len_max_0": 127.3937, "sent_len_max_1": 191.2575, "stdk": 0.0462, "stdq": 0.0419, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 25300 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.1977, "doc_norm": 1.3855, "encoder_q-embeddings": 521.6691, "encoder_q-layer.0": 392.2987, "encoder_q-layer.1": 442.9064, "encoder_q-layer.10": 83.5721, "encoder_q-layer.11": 231.3399, "encoder_q-layer.2": 468.5687, "encoder_q-layer.3": 422.847, "encoder_q-layer.4": 408.8395, "encoder_q-layer.5": 362.2778, "encoder_q-layer.6": 302.6067, "encoder_q-layer.7": 234.3686, "encoder_q-layer.8": 166.0723, "encoder_q-layer.9": 84.9446, "epoch": 0.17, "inbatch_neg_score": 0.2801, "inbatch_pos_score": 0.792, "learning_rate": 4.144444444444445e-05, "loss": 4.1977, "norm_diff": 0.0074, "norm_loss": 0.0, "num_token_doc": 66.7114, "num_token_overlap": 11.6743, "num_token_query": 31.4475, "num_token_union": 65.1105, "num_word_context": 202.3573, "num_word_doc": 49.7373, "num_word_query": 23.3659, "postclip_grad_norm": 1.0, "preclip_grad_norm": 527.6684, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2798, "query_norm": 1.3795, "queue_k_norm": 1.3834, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4475, "sent_len_1": 66.7114, "sent_len_max_0": 127.6188, "sent_len_max_1": 190.7188, "stdk": 0.046, "stdq": 0.0418, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 25400 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 4.1771, "doc_norm": 1.3847, "encoder_q-embeddings": 462.0117, "encoder_q-layer.0": 351.6619, "encoder_q-layer.1": 370.8759, "encoder_q-layer.10": 87.34, "encoder_q-layer.11": 231.8687, "encoder_q-layer.2": 338.0376, "encoder_q-layer.3": 316.2852, "encoder_q-layer.4": 278.4652, "encoder_q-layer.5": 234.8488, "encoder_q-layer.6": 179.7627, "encoder_q-layer.7": 177.5772, "encoder_q-layer.8": 120.3844, "encoder_q-layer.9": 87.3537, "epoch": 0.17, "inbatch_neg_score": 0.285, "inbatch_pos_score": 0.8105, "learning_rate": 4.138888888888889e-05, "loss": 4.1771, "norm_diff": 0.0131, "norm_loss": 0.0, "num_token_doc": 67.0886, "num_token_overlap": 11.7186, "num_token_query": 31.4404, "num_token_union": 65.3109, "num_word_context": 202.4801, "num_word_doc": 50.085, "num_word_query": 23.3463, "postclip_grad_norm": 1.0, "preclip_grad_norm": 430.9641, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2856, "query_norm": 1.383, "queue_k_norm": 1.3834, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4404, "sent_len_1": 67.0886, "sent_len_max_0": 127.445, "sent_len_max_1": 190.8162, "stdk": 0.046, "stdq": 0.0423, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 25500 }, { "accuracy": 41.5039, "active_queue_size": 16384.0, "cl_loss": 4.2098, "doc_norm": 1.384, "encoder_q-embeddings": 939.3348, "encoder_q-layer.0": 651.3391, "encoder_q-layer.1": 740.3123, "encoder_q-layer.10": 101.8122, "encoder_q-layer.11": 265.1147, "encoder_q-layer.2": 818.07, "encoder_q-layer.3": 824.3462, "encoder_q-layer.4": 812.5441, "encoder_q-layer.5": 624.0868, "encoder_q-layer.6": 461.4283, "encoder_q-layer.7": 360.8377, "encoder_q-layer.8": 202.2941, "encoder_q-layer.9": 96.9792, "epoch": 0.17, "inbatch_neg_score": 0.2852, "inbatch_pos_score": 0.7876, "learning_rate": 4.133333333333333e-05, "loss": 4.2098, "norm_diff": 0.0284, "norm_loss": 0.0, "num_token_doc": 67.0046, "num_token_overlap": 11.6404, "num_token_query": 31.302, "num_token_union": 65.2121, "num_word_context": 202.6382, "num_word_doc": 50.0048, "num_word_query": 23.2254, "postclip_grad_norm": 1.0, "preclip_grad_norm": 941.8362, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2852, "query_norm": 1.3556, "queue_k_norm": 1.3836, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.302, "sent_len_1": 67.0046, "sent_len_max_0": 127.5113, "sent_len_max_1": 189.21, "stdk": 0.0459, "stdq": 0.0415, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 25600 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 4.1849, "doc_norm": 1.3813, "encoder_q-embeddings": 205.8179, "encoder_q-layer.0": 143.9097, "encoder_q-layer.1": 145.6739, "encoder_q-layer.10": 85.515, "encoder_q-layer.11": 248.0205, "encoder_q-layer.2": 166.6535, "encoder_q-layer.3": 161.6195, "encoder_q-layer.4": 175.0159, "encoder_q-layer.5": 157.45, "encoder_q-layer.6": 149.3361, "encoder_q-layer.7": 142.3383, "encoder_q-layer.8": 116.9373, "encoder_q-layer.9": 77.3172, "epoch": 0.17, "inbatch_neg_score": 0.2892, "inbatch_pos_score": 0.8076, "learning_rate": 4.127777777777778e-05, "loss": 4.1849, "norm_diff": 0.0113, "norm_loss": 0.0, "num_token_doc": 66.3791, "num_token_overlap": 11.6289, "num_token_query": 31.337, "num_token_union": 64.9049, "num_word_context": 202.153, "num_word_doc": 49.5929, "num_word_query": 23.2723, "postclip_grad_norm": 1.0, "preclip_grad_norm": 244.8981, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2874, "query_norm": 1.3832, "queue_k_norm": 1.3838, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.337, "sent_len_1": 66.3791, "sent_len_max_0": 127.4838, "sent_len_max_1": 187.1275, "stdk": 0.0458, "stdq": 0.0421, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 25700 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 4.1842, "doc_norm": 1.3856, "encoder_q-embeddings": 2223.2617, "encoder_q-layer.0": 1662.1426, "encoder_q-layer.1": 1799.9235, "encoder_q-layer.10": 83.9687, "encoder_q-layer.11": 236.1362, "encoder_q-layer.2": 2172.8296, "encoder_q-layer.3": 2289.6018, "encoder_q-layer.4": 1758.6975, "encoder_q-layer.5": 1316.1721, "encoder_q-layer.6": 967.9765, "encoder_q-layer.7": 600.7329, "encoder_q-layer.8": 298.1808, "encoder_q-layer.9": 101.6376, "epoch": 0.17, "inbatch_neg_score": 0.2982, "inbatch_pos_score": 0.8169, "learning_rate": 4.1222222222222224e-05, "loss": 4.1842, "norm_diff": 0.0228, "norm_loss": 0.0, "num_token_doc": 66.6708, "num_token_overlap": 11.6433, "num_token_query": 31.3324, "num_token_union": 65.0588, "num_word_context": 201.9831, "num_word_doc": 49.7532, "num_word_query": 23.2554, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2223.4533, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2954, "query_norm": 1.3979, "queue_k_norm": 1.383, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3324, "sent_len_1": 66.6708, "sent_len_max_0": 127.6525, "sent_len_max_1": 188.9787, "stdk": 0.0459, "stdq": 0.0423, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 25800 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 4.1952, "doc_norm": 1.386, "encoder_q-embeddings": 363.4986, "encoder_q-layer.0": 257.7398, "encoder_q-layer.1": 302.4518, "encoder_q-layer.10": 82.2162, "encoder_q-layer.11": 221.23, "encoder_q-layer.2": 380.5837, "encoder_q-layer.3": 419.5175, "encoder_q-layer.4": 378.7433, "encoder_q-layer.5": 268.9865, "encoder_q-layer.6": 273.9491, "encoder_q-layer.7": 246.4219, "encoder_q-layer.8": 157.7739, "encoder_q-layer.9": 85.429, "epoch": 0.17, "inbatch_neg_score": 0.2906, "inbatch_pos_score": 0.8188, "learning_rate": 4.116666666666667e-05, "loss": 4.1952, "norm_diff": 0.0143, "norm_loss": 0.0, "num_token_doc": 66.7565, "num_token_overlap": 11.6466, "num_token_query": 31.3706, "num_token_union": 65.1336, "num_word_context": 202.8036, "num_word_doc": 49.8581, "num_word_query": 23.306, "postclip_grad_norm": 1.0, "preclip_grad_norm": 432.2391, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2896, "query_norm": 1.39, "queue_k_norm": 1.3819, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3706, "sent_len_1": 66.7565, "sent_len_max_0": 127.485, "sent_len_max_1": 188.7025, "stdk": 0.046, "stdq": 0.0422, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 25900 }, { "accuracy": 40.625, "active_queue_size": 16384.0, "cl_loss": 4.2072, "doc_norm": 1.3836, "encoder_q-embeddings": 536.3772, "encoder_q-layer.0": 354.0739, "encoder_q-layer.1": 423.4286, "encoder_q-layer.10": 103.9853, "encoder_q-layer.11": 276.2471, "encoder_q-layer.2": 436.9636, "encoder_q-layer.3": 432.9916, "encoder_q-layer.4": 452.5049, "encoder_q-layer.5": 399.9007, "encoder_q-layer.6": 369.1313, "encoder_q-layer.7": 295.2254, "encoder_q-layer.8": 202.6157, "encoder_q-layer.9": 105.5191, "epoch": 0.17, "inbatch_neg_score": 0.3083, "inbatch_pos_score": 0.8145, "learning_rate": 4.111111111111111e-05, "loss": 4.2072, "norm_diff": 0.0223, "norm_loss": 0.0, "num_token_doc": 66.5177, "num_token_overlap": 11.5574, "num_token_query": 31.043, "num_token_union": 64.8513, "num_word_context": 201.8762, "num_word_doc": 49.6478, "num_word_query": 23.0441, "postclip_grad_norm": 1.0, "preclip_grad_norm": 555.0147, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3064, "query_norm": 1.4059, "queue_k_norm": 1.3846, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.043, "sent_len_1": 66.5177, "sent_len_max_0": 127.3187, "sent_len_max_1": 189.9888, "stdk": 0.0459, "stdq": 0.0427, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 26000 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 4.1958, "doc_norm": 1.3873, "encoder_q-embeddings": 1543.2855, "encoder_q-layer.0": 1163.9069, "encoder_q-layer.1": 1273.1628, "encoder_q-layer.10": 97.0484, "encoder_q-layer.11": 234.6158, "encoder_q-layer.2": 1252.4594, "encoder_q-layer.3": 1292.2273, "encoder_q-layer.4": 1192.7742, "encoder_q-layer.5": 953.5977, "encoder_q-layer.6": 734.4058, "encoder_q-layer.7": 609.3099, "encoder_q-layer.8": 304.5172, "encoder_q-layer.9": 156.0606, "epoch": 0.17, "inbatch_neg_score": 0.2876, "inbatch_pos_score": 0.8145, "learning_rate": 4.105555555555556e-05, "loss": 4.1958, "norm_diff": 0.0162, "norm_loss": 0.0, "num_token_doc": 66.6514, "num_token_overlap": 11.7046, "num_token_query": 31.4291, "num_token_union": 65.0479, "num_word_context": 202.0716, "num_word_doc": 49.7406, "num_word_query": 23.352, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1534.8339, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2878, "query_norm": 1.3727, "queue_k_norm": 1.3853, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4291, "sent_len_1": 66.6514, "sent_len_max_0": 127.5875, "sent_len_max_1": 188.48, "stdk": 0.0461, "stdq": 0.0424, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 26100 }, { "accuracy": 39.1602, "active_queue_size": 16384.0, "cl_loss": 4.2029, "doc_norm": 1.3848, "encoder_q-embeddings": 450.8732, "encoder_q-layer.0": 311.7996, "encoder_q-layer.1": 351.1849, "encoder_q-layer.10": 215.1338, "encoder_q-layer.11": 498.3966, "encoder_q-layer.2": 356.325, "encoder_q-layer.3": 379.3068, "encoder_q-layer.4": 357.2725, "encoder_q-layer.5": 396.7375, "encoder_q-layer.6": 363.7623, "encoder_q-layer.7": 308.8778, "encoder_q-layer.8": 276.5492, "encoder_q-layer.9": 202.2856, "epoch": 0.17, "inbatch_neg_score": 0.2702, "inbatch_pos_score": 0.7939, "learning_rate": 4.1e-05, "loss": 4.2029, "norm_diff": 0.0368, "norm_loss": 0.0, "num_token_doc": 66.8024, "num_token_overlap": 11.6598, "num_token_query": 31.3612, "num_token_union": 65.1406, "num_word_context": 202.5772, "num_word_doc": 49.885, "num_word_query": 23.2756, "postclip_grad_norm": 1.0, "preclip_grad_norm": 532.2533, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2695, "query_norm": 1.4216, "queue_k_norm": 1.3846, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3612, "sent_len_1": 66.8024, "sent_len_max_0": 127.215, "sent_len_max_1": 187.6387, "stdk": 0.046, "stdq": 0.0442, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 26200 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.19, "doc_norm": 1.3749, "encoder_q-embeddings": 614.4492, "encoder_q-layer.0": 420.9924, "encoder_q-layer.1": 501.019, "encoder_q-layer.10": 197.3931, "encoder_q-layer.11": 515.4859, "encoder_q-layer.2": 569.9554, "encoder_q-layer.3": 604.6579, "encoder_q-layer.4": 617.3718, "encoder_q-layer.5": 615.2799, "encoder_q-layer.6": 535.7249, "encoder_q-layer.7": 434.5073, "encoder_q-layer.8": 321.5084, "encoder_q-layer.9": 189.1719, "epoch": 0.17, "inbatch_neg_score": 0.2541, "inbatch_pos_score": 0.752, "learning_rate": 4.094444444444445e-05, "loss": 4.19, "norm_diff": 0.0073, "norm_loss": 0.0, "num_token_doc": 66.8544, "num_token_overlap": 11.6523, "num_token_query": 31.2967, "num_token_union": 65.138, "num_word_context": 201.9921, "num_word_doc": 49.8785, "num_word_query": 23.2295, "postclip_grad_norm": 1.0, "preclip_grad_norm": 749.6174, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2539, "query_norm": 1.3693, "queue_k_norm": 1.3846, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2967, "sent_len_1": 66.8544, "sent_len_max_0": 127.28, "sent_len_max_1": 189.1138, "stdk": 0.0456, "stdq": 0.0419, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 26300 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 4.2233, "doc_norm": 1.3802, "encoder_q-embeddings": 4139.9312, "encoder_q-layer.0": 3045.604, "encoder_q-layer.1": 3410.0239, "encoder_q-layer.10": 177.7775, "encoder_q-layer.11": 485.0141, "encoder_q-layer.2": 3884.0669, "encoder_q-layer.3": 4122.2148, "encoder_q-layer.4": 4045.6125, "encoder_q-layer.5": 3718.8474, "encoder_q-layer.6": 3308.3767, "encoder_q-layer.7": 2851.0544, "encoder_q-layer.8": 999.812, "encoder_q-layer.9": 192.8514, "epoch": 0.17, "inbatch_neg_score": 0.2476, "inbatch_pos_score": 0.7637, "learning_rate": 4.088888888888889e-05, "loss": 4.2233, "norm_diff": 0.0355, "norm_loss": 0.0, "num_token_doc": 66.7831, "num_token_overlap": 11.614, "num_token_query": 31.264, "num_token_union": 65.1275, "num_word_context": 202.3345, "num_word_doc": 49.8144, "num_word_query": 23.212, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4588.9925, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2461, "query_norm": 1.4158, "queue_k_norm": 1.3842, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.264, "sent_len_1": 66.7831, "sent_len_max_0": 127.4163, "sent_len_max_1": 189.9688, "stdk": 0.0459, "stdq": 0.0434, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 26400 }, { "accuracy": 41.2109, "active_queue_size": 16384.0, "cl_loss": 4.1817, "doc_norm": 1.3955, "encoder_q-embeddings": 690.4033, "encoder_q-layer.0": 499.7073, "encoder_q-layer.1": 540.7253, "encoder_q-layer.10": 180.9896, "encoder_q-layer.11": 466.4092, "encoder_q-layer.2": 609.1183, "encoder_q-layer.3": 620.3448, "encoder_q-layer.4": 580.7769, "encoder_q-layer.5": 583.9259, "encoder_q-layer.6": 524.3043, "encoder_q-layer.7": 483.9178, "encoder_q-layer.8": 297.6306, "encoder_q-layer.9": 168.2752, "epoch": 0.17, "inbatch_neg_score": 0.2739, "inbatch_pos_score": 0.7939, "learning_rate": 4.0833333333333334e-05, "loss": 4.1817, "norm_diff": 0.0173, "norm_loss": 0.0, "num_token_doc": 66.7393, "num_token_overlap": 11.6542, "num_token_query": 31.3315, "num_token_union": 65.1519, "num_word_context": 202.3835, "num_word_doc": 49.8425, "num_word_query": 23.2538, "postclip_grad_norm": 1.0, "preclip_grad_norm": 770.5337, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2717, "query_norm": 1.4122, "queue_k_norm": 1.3853, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3315, "sent_len_1": 66.7393, "sent_len_max_0": 127.4775, "sent_len_max_1": 187.445, "stdk": 0.0465, "stdq": 0.043, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 26500 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 4.1906, "doc_norm": 1.3885, "encoder_q-embeddings": 778.9492, "encoder_q-layer.0": 513.9444, "encoder_q-layer.1": 532.1317, "encoder_q-layer.10": 163.3802, "encoder_q-layer.11": 432.571, "encoder_q-layer.2": 568.2656, "encoder_q-layer.3": 583.6715, "encoder_q-layer.4": 600.7084, "encoder_q-layer.5": 633.4572, "encoder_q-layer.6": 513.4958, "encoder_q-layer.7": 401.6326, "encoder_q-layer.8": 266.9942, "encoder_q-layer.9": 163.3564, "epoch": 0.17, "inbatch_neg_score": 0.2599, "inbatch_pos_score": 0.7832, "learning_rate": 4.0777777777777783e-05, "loss": 4.1906, "norm_diff": 0.0156, "norm_loss": 0.0, "num_token_doc": 66.5532, "num_token_overlap": 11.6098, "num_token_query": 31.2316, "num_token_union": 64.9177, "num_word_context": 202.0387, "num_word_doc": 49.6694, "num_word_query": 23.1914, "postclip_grad_norm": 1.0, "preclip_grad_norm": 779.9709, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2607, "query_norm": 1.4041, "queue_k_norm": 1.384, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2316, "sent_len_1": 66.5532, "sent_len_max_0": 127.5088, "sent_len_max_1": 189.5975, "stdk": 0.0463, "stdq": 0.043, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 26600 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 4.1683, "doc_norm": 1.3842, "encoder_q-embeddings": 833.2146, "encoder_q-layer.0": 662.4952, "encoder_q-layer.1": 696.4337, "encoder_q-layer.10": 164.8158, "encoder_q-layer.11": 456.2198, "encoder_q-layer.2": 597.7037, "encoder_q-layer.3": 519.751, "encoder_q-layer.4": 470.6225, "encoder_q-layer.5": 436.9337, "encoder_q-layer.6": 421.2173, "encoder_q-layer.7": 359.8272, "encoder_q-layer.8": 283.1669, "encoder_q-layer.9": 164.7079, "epoch": 0.17, "inbatch_neg_score": 0.2868, "inbatch_pos_score": 0.8008, "learning_rate": 4.0722222222222226e-05, "loss": 4.1683, "norm_diff": 0.0164, "norm_loss": 0.0, "num_token_doc": 66.7521, "num_token_overlap": 11.6618, "num_token_query": 31.4197, "num_token_union": 65.144, "num_word_context": 202.0105, "num_word_doc": 49.7655, "num_word_query": 23.3529, "postclip_grad_norm": 1.0, "preclip_grad_norm": 793.8195, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2856, "query_norm": 1.3686, "queue_k_norm": 1.3838, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4197, "sent_len_1": 66.7521, "sent_len_max_0": 127.515, "sent_len_max_1": 189.2788, "stdk": 0.0461, "stdq": 0.0414, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 26700 }, { "accuracy": 41.9922, "active_queue_size": 16384.0, "cl_loss": 4.181, "doc_norm": 1.3859, "encoder_q-embeddings": 580.5507, "encoder_q-layer.0": 377.3372, "encoder_q-layer.1": 409.8848, "encoder_q-layer.10": 167.7701, "encoder_q-layer.11": 509.2891, "encoder_q-layer.2": 464.3795, "encoder_q-layer.3": 496.7404, "encoder_q-layer.4": 516.5732, "encoder_q-layer.5": 473.8054, "encoder_q-layer.6": 513.4516, "encoder_q-layer.7": 513.2376, "encoder_q-layer.8": 363.1783, "encoder_q-layer.9": 174.0749, "epoch": 0.17, "inbatch_neg_score": 0.3023, "inbatch_pos_score": 0.8086, "learning_rate": 4.066666666666667e-05, "loss": 4.181, "norm_diff": 0.0118, "norm_loss": 0.0, "num_token_doc": 66.7348, "num_token_overlap": 11.6634, "num_token_query": 31.3809, "num_token_union": 65.1459, "num_word_context": 202.4435, "num_word_doc": 49.8437, "num_word_query": 23.3005, "postclip_grad_norm": 1.0, "preclip_grad_norm": 683.6124, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3018, "query_norm": 1.3877, "queue_k_norm": 1.3845, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3809, "sent_len_1": 66.7348, "sent_len_max_0": 127.4537, "sent_len_max_1": 187.0263, "stdk": 0.0461, "stdq": 0.042, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 26800 }, { "accuracy": 40.918, "active_queue_size": 16384.0, "cl_loss": 4.1827, "doc_norm": 1.3824, "encoder_q-embeddings": 834.9988, "encoder_q-layer.0": 618.7255, "encoder_q-layer.1": 582.5278, "encoder_q-layer.10": 173.5836, "encoder_q-layer.11": 505.3058, "encoder_q-layer.2": 650.751, "encoder_q-layer.3": 633.434, "encoder_q-layer.4": 646.2616, "encoder_q-layer.5": 563.5821, "encoder_q-layer.6": 394.3053, "encoder_q-layer.7": 311.059, "encoder_q-layer.8": 248.2535, "encoder_q-layer.9": 165.3921, "epoch": 0.18, "inbatch_neg_score": 0.288, "inbatch_pos_score": 0.791, "learning_rate": 4.061111111111111e-05, "loss": 4.1827, "norm_diff": 0.0187, "norm_loss": 0.0, "num_token_doc": 66.74, "num_token_overlap": 11.7049, "num_token_query": 31.3594, "num_token_union": 65.0765, "num_word_context": 202.4184, "num_word_doc": 49.785, "num_word_query": 23.2853, "postclip_grad_norm": 1.0, "preclip_grad_norm": 823.4016, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2871, "query_norm": 1.3814, "queue_k_norm": 1.3842, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3594, "sent_len_1": 66.74, "sent_len_max_0": 127.5913, "sent_len_max_1": 191.0437, "stdk": 0.046, "stdq": 0.0423, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 26900 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 4.1609, "doc_norm": 1.3822, "encoder_q-embeddings": 3428.2852, "encoder_q-layer.0": 2357.062, "encoder_q-layer.1": 2342.6367, "encoder_q-layer.10": 181.7662, "encoder_q-layer.11": 447.8769, "encoder_q-layer.2": 2429.7979, "encoder_q-layer.3": 2575.8113, "encoder_q-layer.4": 2717.7881, "encoder_q-layer.5": 2510.5371, "encoder_q-layer.6": 2320.7947, "encoder_q-layer.7": 1564.4254, "encoder_q-layer.8": 954.8923, "encoder_q-layer.9": 253.5028, "epoch": 0.18, "inbatch_neg_score": 0.2762, "inbatch_pos_score": 0.8076, "learning_rate": 4.055555555555556e-05, "loss": 4.1609, "norm_diff": 0.0146, "norm_loss": 0.0, "num_token_doc": 66.9071, "num_token_overlap": 11.7363, "num_token_query": 31.4843, "num_token_union": 65.1983, "num_word_context": 202.6018, "num_word_doc": 49.8964, "num_word_query": 23.3688, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3337.8878, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2761, "query_norm": 1.3776, "queue_k_norm": 1.3855, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4843, "sent_len_1": 66.9071, "sent_len_max_0": 127.39, "sent_len_max_1": 190.3487, "stdk": 0.0461, "stdq": 0.0426, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 27000 }, { "accuracy": 42.8711, "active_queue_size": 16384.0, "cl_loss": 4.1605, "doc_norm": 1.3866, "encoder_q-embeddings": 579.9479, "encoder_q-layer.0": 427.5442, "encoder_q-layer.1": 468.1582, "encoder_q-layer.10": 172.1223, "encoder_q-layer.11": 464.1439, "encoder_q-layer.2": 462.6757, "encoder_q-layer.3": 454.1694, "encoder_q-layer.4": 451.8773, "encoder_q-layer.5": 422.6661, "encoder_q-layer.6": 388.5994, "encoder_q-layer.7": 327.6017, "encoder_q-layer.8": 227.118, "encoder_q-layer.9": 162.4901, "epoch": 0.18, "inbatch_neg_score": 0.2924, "inbatch_pos_score": 0.8135, "learning_rate": 4.05e-05, "loss": 4.1605, "norm_diff": 0.0188, "norm_loss": 0.0, "num_token_doc": 66.6675, "num_token_overlap": 11.6122, "num_token_query": 31.2664, "num_token_union": 65.0115, "num_word_context": 202.3846, "num_word_doc": 49.7295, "num_word_query": 23.224, "postclip_grad_norm": 1.0, "preclip_grad_norm": 620.0432, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2917, "query_norm": 1.3678, "queue_k_norm": 1.387, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2664, "sent_len_1": 66.6675, "sent_len_max_0": 127.2037, "sent_len_max_1": 188.9475, "stdk": 0.0462, "stdq": 0.0418, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 27100 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 4.1713, "doc_norm": 1.3925, "encoder_q-embeddings": 756.6973, "encoder_q-layer.0": 560.4373, "encoder_q-layer.1": 531.3782, "encoder_q-layer.10": 178.4251, "encoder_q-layer.11": 473.5962, "encoder_q-layer.2": 597.7977, "encoder_q-layer.3": 640.7684, "encoder_q-layer.4": 632.8564, "encoder_q-layer.5": 574.1031, "encoder_q-layer.6": 462.592, "encoder_q-layer.7": 339.773, "encoder_q-layer.8": 224.2662, "encoder_q-layer.9": 163.4934, "epoch": 0.18, "inbatch_neg_score": 0.2999, "inbatch_pos_score": 0.8286, "learning_rate": 4.0444444444444444e-05, "loss": 4.1713, "norm_diff": 0.014, "norm_loss": 0.0, "num_token_doc": 66.6455, "num_token_overlap": 11.6237, "num_token_query": 31.2642, "num_token_union": 64.9663, "num_word_context": 202.0983, "num_word_doc": 49.7463, "num_word_query": 23.2056, "postclip_grad_norm": 1.0, "preclip_grad_norm": 779.7135, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3005, "query_norm": 1.3842, "queue_k_norm": 1.3852, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2642, "sent_len_1": 66.6455, "sent_len_max_0": 127.695, "sent_len_max_1": 190.02, "stdk": 0.0464, "stdq": 0.0426, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 27200 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 4.1782, "doc_norm": 1.381, "encoder_q-embeddings": 1020.8685, "encoder_q-layer.0": 773.2714, "encoder_q-layer.1": 823.0741, "encoder_q-layer.10": 169.4283, "encoder_q-layer.11": 477.0157, "encoder_q-layer.2": 836.5273, "encoder_q-layer.3": 812.6441, "encoder_q-layer.4": 690.7101, "encoder_q-layer.5": 557.7065, "encoder_q-layer.6": 412.0673, "encoder_q-layer.7": 375.5994, "encoder_q-layer.8": 284.1892, "encoder_q-layer.9": 170.1553, "epoch": 0.18, "inbatch_neg_score": 0.3071, "inbatch_pos_score": 0.8159, "learning_rate": 4.038888888888889e-05, "loss": 4.1782, "norm_diff": 0.0157, "norm_loss": 0.0, "num_token_doc": 66.8569, "num_token_overlap": 11.7018, "num_token_query": 31.518, "num_token_union": 65.2283, "num_word_context": 202.6596, "num_word_doc": 49.8811, "num_word_query": 23.3929, "postclip_grad_norm": 1.0, "preclip_grad_norm": 984.9406, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3037, "query_norm": 1.3722, "queue_k_norm": 1.3863, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.518, "sent_len_1": 66.8569, "sent_len_max_0": 127.6538, "sent_len_max_1": 191.0213, "stdk": 0.0459, "stdq": 0.0417, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 27300 }, { "accuracy": 42.7734, "active_queue_size": 16384.0, "cl_loss": 4.1783, "doc_norm": 1.3885, "encoder_q-embeddings": 977.9401, "encoder_q-layer.0": 700.2694, "encoder_q-layer.1": 780.6597, "encoder_q-layer.10": 203.047, "encoder_q-layer.11": 502.914, "encoder_q-layer.2": 794.8619, "encoder_q-layer.3": 698.2296, "encoder_q-layer.4": 633.2249, "encoder_q-layer.5": 556.7201, "encoder_q-layer.6": 464.4274, "encoder_q-layer.7": 349.8059, "encoder_q-layer.8": 259.0374, "encoder_q-layer.9": 172.4934, "epoch": 0.18, "inbatch_neg_score": 0.3047, "inbatch_pos_score": 0.8164, "learning_rate": 4.0333333333333336e-05, "loss": 4.1783, "norm_diff": 0.0268, "norm_loss": 0.0, "num_token_doc": 67.1502, "num_token_overlap": 11.6303, "num_token_query": 31.3214, "num_token_union": 65.357, "num_word_context": 202.5993, "num_word_doc": 50.0718, "num_word_query": 23.2525, "postclip_grad_norm": 1.0, "preclip_grad_norm": 935.2842, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3037, "query_norm": 1.3619, "queue_k_norm": 1.3852, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3214, "sent_len_1": 67.1502, "sent_len_max_0": 127.5487, "sent_len_max_1": 190.2425, "stdk": 0.0462, "stdq": 0.0418, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 27400 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 4.1888, "doc_norm": 1.3899, "encoder_q-embeddings": 233.4903, "encoder_q-layer.0": 163.694, "encoder_q-layer.1": 177.7944, "encoder_q-layer.10": 187.9109, "encoder_q-layer.11": 441.7175, "encoder_q-layer.2": 199.0381, "encoder_q-layer.3": 213.1391, "encoder_q-layer.4": 202.8982, "encoder_q-layer.5": 217.3717, "encoder_q-layer.6": 208.3672, "encoder_q-layer.7": 178.6441, "encoder_q-layer.8": 195.1974, "encoder_q-layer.9": 159.4604, "epoch": 0.18, "inbatch_neg_score": 0.3028, "inbatch_pos_score": 0.8252, "learning_rate": 4.027777777777778e-05, "loss": 4.1888, "norm_diff": 0.0146, "norm_loss": 0.0, "num_token_doc": 66.7858, "num_token_overlap": 11.6633, "num_token_query": 31.3475, "num_token_union": 65.116, "num_word_context": 202.3498, "num_word_doc": 49.8258, "num_word_query": 23.2636, "postclip_grad_norm": 1.0, "preclip_grad_norm": 339.7991, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3032, "query_norm": 1.3862, "queue_k_norm": 1.3856, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3475, "sent_len_1": 66.7858, "sent_len_max_0": 127.4663, "sent_len_max_1": 190.6962, "stdk": 0.0462, "stdq": 0.0419, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 27500 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 4.1701, "doc_norm": 1.3929, "encoder_q-embeddings": 797.616, "encoder_q-layer.0": 583.75, "encoder_q-layer.1": 624.7178, "encoder_q-layer.10": 182.2293, "encoder_q-layer.11": 460.9167, "encoder_q-layer.2": 723.6121, "encoder_q-layer.3": 615.642, "encoder_q-layer.4": 588.9063, "encoder_q-layer.5": 534.0607, "encoder_q-layer.6": 456.6407, "encoder_q-layer.7": 342.3184, "encoder_q-layer.8": 275.6661, "encoder_q-layer.9": 178.4731, "epoch": 0.18, "inbatch_neg_score": 0.3045, "inbatch_pos_score": 0.8218, "learning_rate": 4.022222222222222e-05, "loss": 4.1701, "norm_diff": 0.0172, "norm_loss": 0.0, "num_token_doc": 66.656, "num_token_overlap": 11.7108, "num_token_query": 31.5235, "num_token_union": 65.1019, "num_word_context": 202.3268, "num_word_doc": 49.7673, "num_word_query": 23.4146, "postclip_grad_norm": 1.0, "preclip_grad_norm": 829.7448, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3042, "query_norm": 1.3838, "queue_k_norm": 1.3854, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5235, "sent_len_1": 66.656, "sent_len_max_0": 127.35, "sent_len_max_1": 190.3325, "stdk": 0.0463, "stdq": 0.0423, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 27600 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.1653, "doc_norm": 1.3846, "encoder_q-embeddings": 543.7017, "encoder_q-layer.0": 399.0209, "encoder_q-layer.1": 411.8991, "encoder_q-layer.10": 182.2989, "encoder_q-layer.11": 481.1927, "encoder_q-layer.2": 402.8851, "encoder_q-layer.3": 393.1947, "encoder_q-layer.4": 404.8341, "encoder_q-layer.5": 373.3493, "encoder_q-layer.6": 324.8568, "encoder_q-layer.7": 308.351, "encoder_q-layer.8": 276.9642, "encoder_q-layer.9": 171.9257, "epoch": 0.18, "inbatch_neg_score": 0.3079, "inbatch_pos_score": 0.8115, "learning_rate": 4.016666666666667e-05, "loss": 4.1653, "norm_diff": 0.0165, "norm_loss": 0.0, "num_token_doc": 66.5675, "num_token_overlap": 11.6779, "num_token_query": 31.4349, "num_token_union": 65.0336, "num_word_context": 202.198, "num_word_doc": 49.6969, "num_word_query": 23.34, "postclip_grad_norm": 1.0, "preclip_grad_norm": 577.3332, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3071, "query_norm": 1.3735, "queue_k_norm": 1.386, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4349, "sent_len_1": 66.5675, "sent_len_max_0": 127.4075, "sent_len_max_1": 189.1738, "stdk": 0.046, "stdq": 0.042, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 27700 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 4.1516, "doc_norm": 1.3859, "encoder_q-embeddings": 773.6476, "encoder_q-layer.0": 559.6675, "encoder_q-layer.1": 645.0304, "encoder_q-layer.10": 181.9909, "encoder_q-layer.11": 455.4279, "encoder_q-layer.2": 661.5242, "encoder_q-layer.3": 704.7736, "encoder_q-layer.4": 568.7114, "encoder_q-layer.5": 455.044, "encoder_q-layer.6": 360.06, "encoder_q-layer.7": 282.7043, "encoder_q-layer.8": 219.1549, "encoder_q-layer.9": 167.7123, "epoch": 0.18, "inbatch_neg_score": 0.3122, "inbatch_pos_score": 0.834, "learning_rate": 4.011111111111111e-05, "loss": 4.1516, "norm_diff": 0.026, "norm_loss": 0.0, "num_token_doc": 67.0137, "num_token_overlap": 11.6688, "num_token_query": 31.4501, "num_token_union": 65.3288, "num_word_context": 202.6066, "num_word_doc": 49.974, "num_word_query": 23.3486, "postclip_grad_norm": 1.0, "preclip_grad_norm": 783.3481, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3115, "query_norm": 1.361, "queue_k_norm": 1.3877, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4501, "sent_len_1": 67.0137, "sent_len_max_0": 127.4725, "sent_len_max_1": 191.585, "stdk": 0.046, "stdq": 0.0415, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 27800 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 4.1434, "doc_norm": 1.3898, "encoder_q-embeddings": 592.3532, "encoder_q-layer.0": 464.1314, "encoder_q-layer.1": 444.1732, "encoder_q-layer.10": 178.2639, "encoder_q-layer.11": 466.1064, "encoder_q-layer.2": 439.843, "encoder_q-layer.3": 435.7037, "encoder_q-layer.4": 399.2469, "encoder_q-layer.5": 369.9823, "encoder_q-layer.6": 371.8457, "encoder_q-layer.7": 339.8945, "encoder_q-layer.8": 363.3839, "encoder_q-layer.9": 236.7835, "epoch": 0.18, "inbatch_neg_score": 0.3074, "inbatch_pos_score": 0.8359, "learning_rate": 4.0055555555555554e-05, "loss": 4.1434, "norm_diff": 0.0081, "norm_loss": 0.0, "num_token_doc": 66.9445, "num_token_overlap": 11.7287, "num_token_query": 31.4277, "num_token_union": 65.1843, "num_word_context": 202.5595, "num_word_doc": 49.983, "num_word_query": 23.3611, "postclip_grad_norm": 1.0, "preclip_grad_norm": 625.15, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3062, "query_norm": 1.3967, "queue_k_norm": 1.3895, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4277, "sent_len_1": 66.9445, "sent_len_max_0": 127.5738, "sent_len_max_1": 188.8325, "stdk": 0.0462, "stdq": 0.0431, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 27900 }, { "accuracy": 42.3828, "active_queue_size": 16384.0, "cl_loss": 4.1437, "doc_norm": 1.3827, "encoder_q-embeddings": 398.0502, "encoder_q-layer.0": 268.5862, "encoder_q-layer.1": 314.1215, "encoder_q-layer.10": 214.8101, "encoder_q-layer.11": 517.2015, "encoder_q-layer.2": 314.9379, "encoder_q-layer.3": 318.4446, "encoder_q-layer.4": 302.4564, "encoder_q-layer.5": 279.7844, "encoder_q-layer.6": 266.1086, "encoder_q-layer.7": 228.1966, "encoder_q-layer.8": 218.8242, "encoder_q-layer.9": 164.0043, "epoch": 0.18, "inbatch_neg_score": 0.296, "inbatch_pos_score": 0.8096, "learning_rate": 4e-05, "loss": 4.1437, "norm_diff": 0.0079, "norm_loss": 0.0, "num_token_doc": 66.7806, "num_token_overlap": 11.6651, "num_token_query": 31.2285, "num_token_union": 65.018, "num_word_context": 202.0766, "num_word_doc": 49.8082, "num_word_query": 23.1894, "postclip_grad_norm": 1.0, "preclip_grad_norm": 470.9327, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2944, "query_norm": 1.377, "queue_k_norm": 1.3894, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2285, "sent_len_1": 66.7806, "sent_len_max_0": 127.5012, "sent_len_max_1": 190.235, "stdk": 0.0458, "stdq": 0.0426, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 28000 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 4.1582, "doc_norm": 1.3954, "encoder_q-embeddings": 924.0653, "encoder_q-layer.0": 762.3678, "encoder_q-layer.1": 847.5322, "encoder_q-layer.10": 199.2793, "encoder_q-layer.11": 555.918, "encoder_q-layer.2": 905.2646, "encoder_q-layer.3": 818.3694, "encoder_q-layer.4": 703.8615, "encoder_q-layer.5": 646.5668, "encoder_q-layer.6": 633.5859, "encoder_q-layer.7": 477.2415, "encoder_q-layer.8": 309.0574, "encoder_q-layer.9": 189.2502, "epoch": 0.18, "inbatch_neg_score": 0.296, "inbatch_pos_score": 0.8223, "learning_rate": 3.9944444444444446e-05, "loss": 4.1582, "norm_diff": 0.0349, "norm_loss": 0.0, "num_token_doc": 66.8488, "num_token_overlap": 11.6946, "num_token_query": 31.5153, "num_token_union": 65.2634, "num_word_context": 202.1474, "num_word_doc": 49.8737, "num_word_query": 23.4167, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1015.6277, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2959, "query_norm": 1.3605, "queue_k_norm": 1.3898, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5153, "sent_len_1": 66.8488, "sent_len_max_0": 127.7087, "sent_len_max_1": 187.7525, "stdk": 0.0464, "stdq": 0.0414, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 28100 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 4.122, "doc_norm": 1.3922, "encoder_q-embeddings": 1814.0017, "encoder_q-layer.0": 1335.8929, "encoder_q-layer.1": 1294.9524, "encoder_q-layer.10": 353.7628, "encoder_q-layer.11": 925.1452, "encoder_q-layer.2": 1505.053, "encoder_q-layer.3": 1360.4197, "encoder_q-layer.4": 1413.047, "encoder_q-layer.5": 1626.2206, "encoder_q-layer.6": 1965.396, "encoder_q-layer.7": 2114.3525, "encoder_q-layer.8": 1440.4174, "encoder_q-layer.9": 435.4533, "epoch": 0.18, "inbatch_neg_score": 0.2913, "inbatch_pos_score": 0.8105, "learning_rate": 3.9888888888888895e-05, "loss": 4.122, "norm_diff": 0.0344, "norm_loss": 0.0, "num_token_doc": 66.8531, "num_token_overlap": 11.7035, "num_token_query": 31.4442, "num_token_union": 65.2348, "num_word_context": 202.3453, "num_word_doc": 49.8832, "num_word_query": 23.3524, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2189.1295, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.29, "query_norm": 1.3579, "queue_k_norm": 1.3902, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4442, "sent_len_1": 66.8531, "sent_len_max_0": 127.6188, "sent_len_max_1": 188.455, "stdk": 0.0462, "stdq": 0.0418, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 28200 }, { "accuracy": 41.3086, "active_queue_size": 16384.0, "cl_loss": 4.1756, "doc_norm": 1.3791, "encoder_q-embeddings": 935.2111, "encoder_q-layer.0": 613.8918, "encoder_q-layer.1": 696.9637, "encoder_q-layer.10": 361.6692, "encoder_q-layer.11": 958.0152, "encoder_q-layer.2": 746.4953, "encoder_q-layer.3": 917.424, "encoder_q-layer.4": 969.2, "encoder_q-layer.5": 860.6006, "encoder_q-layer.6": 733.0739, "encoder_q-layer.7": 713.6273, "encoder_q-layer.8": 619.8663, "encoder_q-layer.9": 372.0376, "epoch": 0.18, "inbatch_neg_score": 0.2807, "inbatch_pos_score": 0.79, "learning_rate": 3.983333333333333e-05, "loss": 4.1756, "norm_diff": 0.0318, "norm_loss": 0.0, "num_token_doc": 66.6292, "num_token_overlap": 11.6291, "num_token_query": 31.3478, "num_token_union": 65.0646, "num_word_context": 201.91, "num_word_doc": 49.7227, "num_word_query": 23.2853, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1140.2595, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2788, "query_norm": 1.3494, "queue_k_norm": 1.3887, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3478, "sent_len_1": 66.6292, "sent_len_max_0": 127.595, "sent_len_max_1": 190.425, "stdk": 0.0457, "stdq": 0.0418, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 28300 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 4.1328, "doc_norm": 1.3879, "encoder_q-embeddings": 501.547, "encoder_q-layer.0": 357.1512, "encoder_q-layer.1": 355.4201, "encoder_q-layer.10": 354.9784, "encoder_q-layer.11": 962.2771, "encoder_q-layer.2": 386.7368, "encoder_q-layer.3": 375.1669, "encoder_q-layer.4": 330.9015, "encoder_q-layer.5": 322.92, "encoder_q-layer.6": 339.7694, "encoder_q-layer.7": 337.6964, "encoder_q-layer.8": 366.4543, "encoder_q-layer.9": 314.7134, "epoch": 0.18, "inbatch_neg_score": 0.2715, "inbatch_pos_score": 0.7979, "learning_rate": 3.977777777777778e-05, "loss": 4.1328, "norm_diff": 0.0314, "norm_loss": 0.0, "num_token_doc": 66.8226, "num_token_overlap": 11.6814, "num_token_query": 31.4163, "num_token_union": 65.1568, "num_word_context": 202.4902, "num_word_doc": 49.871, "num_word_query": 23.3337, "postclip_grad_norm": 1.0, "preclip_grad_norm": 696.4318, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.27, "query_norm": 1.3565, "queue_k_norm": 1.3903, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4163, "sent_len_1": 66.8226, "sent_len_max_0": 127.4087, "sent_len_max_1": 188.2625, "stdk": 0.0461, "stdq": 0.0425, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 28400 }, { "accuracy": 41.2109, "active_queue_size": 16384.0, "cl_loss": 4.1501, "doc_norm": 1.3894, "encoder_q-embeddings": 2203.6494, "encoder_q-layer.0": 1605.4136, "encoder_q-layer.1": 1668.6284, "encoder_q-layer.10": 364.5456, "encoder_q-layer.11": 920.2629, "encoder_q-layer.2": 2018.2935, "encoder_q-layer.3": 2011.4315, "encoder_q-layer.4": 2192.95, "encoder_q-layer.5": 2524.7021, "encoder_q-layer.6": 2564.3147, "encoder_q-layer.7": 2615.7712, "encoder_q-layer.8": 1796.4312, "encoder_q-layer.9": 606.8531, "epoch": 0.19, "inbatch_neg_score": 0.276, "inbatch_pos_score": 0.791, "learning_rate": 3.972222222222222e-05, "loss": 4.1501, "norm_diff": 0.0293, "norm_loss": 0.0, "num_token_doc": 66.9675, "num_token_overlap": 11.6651, "num_token_query": 31.3933, "num_token_union": 65.2871, "num_word_context": 202.2835, "num_word_doc": 50.0031, "num_word_query": 23.3071, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2886.1982, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2764, "query_norm": 1.3601, "queue_k_norm": 1.3892, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3933, "sent_len_1": 66.9675, "sent_len_max_0": 127.4488, "sent_len_max_1": 187.4988, "stdk": 0.0462, "stdq": 0.0425, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 28500 }, { "accuracy": 42.4805, "active_queue_size": 16384.0, "cl_loss": 4.1674, "doc_norm": 1.387, "encoder_q-embeddings": 1887.7627, "encoder_q-layer.0": 1306.9825, "encoder_q-layer.1": 1456.3541, "encoder_q-layer.10": 370.294, "encoder_q-layer.11": 964.489, "encoder_q-layer.2": 1713.8931, "encoder_q-layer.3": 1607.5168, "encoder_q-layer.4": 1587.0989, "encoder_q-layer.5": 1094.9186, "encoder_q-layer.6": 964.7616, "encoder_q-layer.7": 775.1726, "encoder_q-layer.8": 569.3221, "encoder_q-layer.9": 317.4778, "epoch": 0.19, "inbatch_neg_score": 0.2635, "inbatch_pos_score": 0.7661, "learning_rate": 3.966666666666667e-05, "loss": 4.1674, "norm_diff": 0.0539, "norm_loss": 0.0, "num_token_doc": 66.8461, "num_token_overlap": 11.6391, "num_token_query": 31.3433, "num_token_union": 65.186, "num_word_context": 202.3665, "num_word_doc": 49.8542, "num_word_query": 23.2693, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1949.8607, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2651, "query_norm": 1.3331, "queue_k_norm": 1.3866, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3433, "sent_len_1": 66.8461, "sent_len_max_0": 127.455, "sent_len_max_1": 190.8775, "stdk": 0.0462, "stdq": 0.0419, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 28600 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 4.1684, "doc_norm": 1.3944, "encoder_q-embeddings": 876.0153, "encoder_q-layer.0": 604.7543, "encoder_q-layer.1": 644.8633, "encoder_q-layer.10": 328.3689, "encoder_q-layer.11": 885.1219, "encoder_q-layer.2": 666.0261, "encoder_q-layer.3": 643.2851, "encoder_q-layer.4": 600.3842, "encoder_q-layer.5": 495.6147, "encoder_q-layer.6": 494.484, "encoder_q-layer.7": 462.9651, "encoder_q-layer.8": 410.5614, "encoder_q-layer.9": 317.1711, "epoch": 0.19, "inbatch_neg_score": 0.247, "inbatch_pos_score": 0.7842, "learning_rate": 3.961111111111111e-05, "loss": 4.1684, "norm_diff": 0.0471, "norm_loss": 0.0, "num_token_doc": 66.6103, "num_token_overlap": 11.6312, "num_token_query": 31.1839, "num_token_union": 64.93, "num_word_context": 202.2368, "num_word_doc": 49.6962, "num_word_query": 23.1355, "postclip_grad_norm": 1.0, "preclip_grad_norm": 937.6876, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2485, "query_norm": 1.3472, "queue_k_norm": 1.387, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.1839, "sent_len_1": 66.6103, "sent_len_max_0": 127.4488, "sent_len_max_1": 188.8738, "stdk": 0.0465, "stdq": 0.0428, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 28700 }, { "accuracy": 40.4297, "active_queue_size": 16384.0, "cl_loss": 4.1319, "doc_norm": 1.386, "encoder_q-embeddings": 8492.7393, "encoder_q-layer.0": 6896.376, "encoder_q-layer.1": 6910.6172, "encoder_q-layer.10": 429.0525, "encoder_q-layer.11": 1095.432, "encoder_q-layer.2": 7576.1035, "encoder_q-layer.3": 7585.9951, "encoder_q-layer.4": 6841.7197, "encoder_q-layer.5": 5964.0259, "encoder_q-layer.6": 6065.1763, "encoder_q-layer.7": 7050.1919, "encoder_q-layer.8": 3252.0771, "encoder_q-layer.9": 735.8636, "epoch": 0.19, "inbatch_neg_score": 0.2565, "inbatch_pos_score": 0.7812, "learning_rate": 3.9555555555555556e-05, "loss": 4.1319, "norm_diff": 0.0164, "norm_loss": 0.0, "num_token_doc": 66.8711, "num_token_overlap": 11.6591, "num_token_query": 31.3064, "num_token_union": 65.1409, "num_word_context": 202.6661, "num_word_doc": 49.9207, "num_word_query": 23.25, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9149.1651, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2563, "query_norm": 1.3794, "queue_k_norm": 1.386, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3064, "sent_len_1": 66.8711, "sent_len_max_0": 127.4712, "sent_len_max_1": 187.6525, "stdk": 0.0462, "stdq": 0.0437, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 28800 }, { "accuracy": 41.0156, "active_queue_size": 16384.0, "cl_loss": 4.1649, "doc_norm": 1.385, "encoder_q-embeddings": 838.767, "encoder_q-layer.0": 612.7246, "encoder_q-layer.1": 666.0446, "encoder_q-layer.10": 362.3845, "encoder_q-layer.11": 940.3862, "encoder_q-layer.2": 705.3461, "encoder_q-layer.3": 714.9802, "encoder_q-layer.4": 696.588, "encoder_q-layer.5": 734.0386, "encoder_q-layer.6": 669.3669, "encoder_q-layer.7": 600.3476, "encoder_q-layer.8": 431.8342, "encoder_q-layer.9": 325.6343, "epoch": 0.19, "inbatch_neg_score": 0.2543, "inbatch_pos_score": 0.7588, "learning_rate": 3.9500000000000005e-05, "loss": 4.1649, "norm_diff": 0.0485, "norm_loss": 0.0, "num_token_doc": 66.8428, "num_token_overlap": 11.6943, "num_token_query": 31.3509, "num_token_union": 65.0758, "num_word_context": 202.4612, "num_word_doc": 49.8719, "num_word_query": 23.2695, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1013.1267, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.252, "query_norm": 1.3366, "queue_k_norm": 1.3846, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3509, "sent_len_1": 66.8428, "sent_len_max_0": 127.3388, "sent_len_max_1": 189.7012, "stdk": 0.0462, "stdq": 0.0425, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 28900 }, { "accuracy": 42.7734, "active_queue_size": 16384.0, "cl_loss": 4.1402, "doc_norm": 1.39, "encoder_q-embeddings": 1477.3594, "encoder_q-layer.0": 1138.6698, "encoder_q-layer.1": 1157.8135, "encoder_q-layer.10": 361.321, "encoder_q-layer.11": 902.191, "encoder_q-layer.2": 1078.0028, "encoder_q-layer.3": 1066.7604, "encoder_q-layer.4": 947.2614, "encoder_q-layer.5": 632.3271, "encoder_q-layer.6": 531.4703, "encoder_q-layer.7": 430.1802, "encoder_q-layer.8": 370.9211, "encoder_q-layer.9": 302.3617, "epoch": 0.19, "inbatch_neg_score": 0.2576, "inbatch_pos_score": 0.7964, "learning_rate": 3.944444444444445e-05, "loss": 4.1402, "norm_diff": 0.0329, "norm_loss": 0.0, "num_token_doc": 66.6743, "num_token_overlap": 11.7045, "num_token_query": 31.4436, "num_token_union": 65.0622, "num_word_context": 202.5046, "num_word_doc": 49.7851, "num_word_query": 23.3597, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1390.8204, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2578, "query_norm": 1.3571, "queue_k_norm": 1.3815, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4436, "sent_len_1": 66.6743, "sent_len_max_0": 127.4338, "sent_len_max_1": 188.825, "stdk": 0.0465, "stdq": 0.0431, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 29000 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 4.131, "doc_norm": 1.3801, "encoder_q-embeddings": 9146.2012, "encoder_q-layer.0": 6229.98, "encoder_q-layer.1": 5140.9883, "encoder_q-layer.10": 347.1017, "encoder_q-layer.11": 980.4635, "encoder_q-layer.2": 5443.5952, "encoder_q-layer.3": 4665.7778, "encoder_q-layer.4": 5174.8359, "encoder_q-layer.5": 3947.9512, "encoder_q-layer.6": 3180.8889, "encoder_q-layer.7": 3847.6001, "encoder_q-layer.8": 2214.4211, "encoder_q-layer.9": 489.1528, "epoch": 0.19, "inbatch_neg_score": 0.2668, "inbatch_pos_score": 0.7905, "learning_rate": 3.938888888888889e-05, "loss": 4.131, "norm_diff": 0.0388, "norm_loss": 0.0, "num_token_doc": 66.8211, "num_token_overlap": 11.6904, "num_token_query": 31.3229, "num_token_union": 65.0971, "num_word_context": 202.3747, "num_word_doc": 49.8204, "num_word_query": 23.2603, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7471.7611, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2671, "query_norm": 1.3414, "queue_k_norm": 1.3823, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3229, "sent_len_1": 66.8211, "sent_len_max_0": 127.4537, "sent_len_max_1": 191.45, "stdk": 0.0461, "stdq": 0.0421, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 29100 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 4.1671, "doc_norm": 1.3873, "encoder_q-embeddings": 6211.3379, "encoder_q-layer.0": 4604.4043, "encoder_q-layer.1": 4475.4678, "encoder_q-layer.10": 358.7778, "encoder_q-layer.11": 992.3606, "encoder_q-layer.2": 4476.3345, "encoder_q-layer.3": 4763.2324, "encoder_q-layer.4": 4573.125, "encoder_q-layer.5": 4206.7056, "encoder_q-layer.6": 3503.8247, "encoder_q-layer.7": 4336.6011, "encoder_q-layer.8": 2528.8931, "encoder_q-layer.9": 565.9685, "epoch": 0.19, "inbatch_neg_score": 0.2775, "inbatch_pos_score": 0.8164, "learning_rate": 3.933333333333333e-05, "loss": 4.1671, "norm_diff": 0.0188, "norm_loss": 0.0, "num_token_doc": 66.6203, "num_token_overlap": 11.6435, "num_token_query": 31.3101, "num_token_union": 65.0253, "num_word_context": 202.2513, "num_word_doc": 49.7508, "num_word_query": 23.248, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6201.2821, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2776, "query_norm": 1.3685, "queue_k_norm": 1.3809, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3101, "sent_len_1": 66.6203, "sent_len_max_0": 127.6363, "sent_len_max_1": 189.875, "stdk": 0.0464, "stdq": 0.0426, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 29200 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 4.1318, "doc_norm": 1.3843, "encoder_q-embeddings": 2039.8932, "encoder_q-layer.0": 1386.9808, "encoder_q-layer.1": 1612.9941, "encoder_q-layer.10": 381.5011, "encoder_q-layer.11": 987.1123, "encoder_q-layer.2": 1717.2113, "encoder_q-layer.3": 1646.8763, "encoder_q-layer.4": 1679.3361, "encoder_q-layer.5": 1519.1366, "encoder_q-layer.6": 1310.879, "encoder_q-layer.7": 938.9351, "encoder_q-layer.8": 675.0369, "encoder_q-layer.9": 359.1542, "epoch": 0.19, "inbatch_neg_score": 0.263, "inbatch_pos_score": 0.7769, "learning_rate": 3.927777777777778e-05, "loss": 4.1318, "norm_diff": 0.0625, "norm_loss": 0.0, "num_token_doc": 66.8112, "num_token_overlap": 11.7308, "num_token_query": 31.5347, "num_token_union": 65.1856, "num_word_context": 202.2509, "num_word_doc": 49.8264, "num_word_query": 23.4335, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2074.0698, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2627, "query_norm": 1.3218, "queue_k_norm": 1.3822, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5347, "sent_len_1": 66.8112, "sent_len_max_0": 127.4537, "sent_len_max_1": 190.1575, "stdk": 0.0463, "stdq": 0.0416, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 29300 }, { "accuracy": 41.5039, "active_queue_size": 16384.0, "cl_loss": 4.1298, "doc_norm": 1.3829, "encoder_q-embeddings": 2801.8396, "encoder_q-layer.0": 2015.8625, "encoder_q-layer.1": 2287.3179, "encoder_q-layer.10": 332.3127, "encoder_q-layer.11": 954.4974, "encoder_q-layer.2": 2556.8201, "encoder_q-layer.3": 1981.1376, "encoder_q-layer.4": 1558.3934, "encoder_q-layer.5": 1201.2063, "encoder_q-layer.6": 1050.9114, "encoder_q-layer.7": 972.078, "encoder_q-layer.8": 637.948, "encoder_q-layer.9": 356.8709, "epoch": 0.19, "inbatch_neg_score": 0.2639, "inbatch_pos_score": 0.7871, "learning_rate": 3.922222222222223e-05, "loss": 4.1298, "norm_diff": 0.0406, "norm_loss": 0.0, "num_token_doc": 67.1778, "num_token_overlap": 11.7638, "num_token_query": 31.5086, "num_token_union": 65.393, "num_word_context": 202.3997, "num_word_doc": 50.1051, "num_word_query": 23.4293, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2604.2372, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2632, "query_norm": 1.3423, "queue_k_norm": 1.3816, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5086, "sent_len_1": 67.1778, "sent_len_max_0": 127.7062, "sent_len_max_1": 190.1662, "stdk": 0.0463, "stdq": 0.0423, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 29400 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 4.1395, "doc_norm": 1.3786, "encoder_q-embeddings": 1908.3723, "encoder_q-layer.0": 1375.9573, "encoder_q-layer.1": 1705.1123, "encoder_q-layer.10": 363.2338, "encoder_q-layer.11": 970.4048, "encoder_q-layer.2": 2109.5557, "encoder_q-layer.3": 2247.0833, "encoder_q-layer.4": 1985.7275, "encoder_q-layer.5": 1741.6412, "encoder_q-layer.6": 1768.8066, "encoder_q-layer.7": 1333.1686, "encoder_q-layer.8": 1235.9542, "encoder_q-layer.9": 588.8475, "epoch": 0.19, "inbatch_neg_score": 0.2631, "inbatch_pos_score": 0.7988, "learning_rate": 3.9166666666666665e-05, "loss": 4.1395, "norm_diff": 0.0397, "norm_loss": 0.0, "num_token_doc": 67.1288, "num_token_overlap": 11.6476, "num_token_query": 31.317, "num_token_union": 65.2859, "num_word_context": 202.5577, "num_word_doc": 50.0562, "num_word_query": 23.2592, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2390.2431, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2625, "query_norm": 1.3389, "queue_k_norm": 1.3829, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.317, "sent_len_1": 67.1288, "sent_len_max_0": 127.4975, "sent_len_max_1": 191.3075, "stdk": 0.0461, "stdq": 0.0421, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 29500 }, { "accuracy": 40.332, "active_queue_size": 16384.0, "cl_loss": 4.1469, "doc_norm": 1.3803, "encoder_q-embeddings": 2469.3628, "encoder_q-layer.0": 1892.3562, "encoder_q-layer.1": 1901.3574, "encoder_q-layer.10": 379.9078, "encoder_q-layer.11": 981.5968, "encoder_q-layer.2": 2270.6335, "encoder_q-layer.3": 1988.7272, "encoder_q-layer.4": 1638.2937, "encoder_q-layer.5": 1145.7228, "encoder_q-layer.6": 943.1134, "encoder_q-layer.7": 729.7283, "encoder_q-layer.8": 578.2144, "encoder_q-layer.9": 368.3563, "epoch": 0.19, "inbatch_neg_score": 0.2562, "inbatch_pos_score": 0.7471, "learning_rate": 3.9111111111111115e-05, "loss": 4.1469, "norm_diff": 0.047, "norm_loss": 0.0, "num_token_doc": 66.9154, "num_token_overlap": 11.6709, "num_token_query": 31.357, "num_token_union": 65.2223, "num_word_context": 202.492, "num_word_doc": 49.8958, "num_word_query": 23.3021, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2402.2337, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2549, "query_norm": 1.3332, "queue_k_norm": 1.3805, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.357, "sent_len_1": 66.9154, "sent_len_max_0": 127.415, "sent_len_max_1": 189.4087, "stdk": 0.0462, "stdq": 0.0421, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 29600 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.1149, "doc_norm": 1.3783, "encoder_q-embeddings": 1982.8604, "encoder_q-layer.0": 1421.8354, "encoder_q-layer.1": 1619.7869, "encoder_q-layer.10": 341.0427, "encoder_q-layer.11": 909.4309, "encoder_q-layer.2": 1656.0192, "encoder_q-layer.3": 1656.9645, "encoder_q-layer.4": 1571.451, "encoder_q-layer.5": 1345.7278, "encoder_q-layer.6": 1141.6869, "encoder_q-layer.7": 832.1529, "encoder_q-layer.8": 523.1849, "encoder_q-layer.9": 315.5705, "epoch": 0.19, "inbatch_neg_score": 0.2582, "inbatch_pos_score": 0.792, "learning_rate": 3.905555555555556e-05, "loss": 4.1149, "norm_diff": 0.0228, "norm_loss": 0.0, "num_token_doc": 66.9568, "num_token_overlap": 11.7445, "num_token_query": 31.5098, "num_token_union": 65.2399, "num_word_context": 202.6253, "num_word_doc": 49.9696, "num_word_query": 23.4167, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2009.534, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2578, "query_norm": 1.3606, "queue_k_norm": 1.382, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5098, "sent_len_1": 66.9568, "sent_len_max_0": 127.6125, "sent_len_max_1": 190.8638, "stdk": 0.0461, "stdq": 0.0429, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 29700 }, { "accuracy": 41.9922, "active_queue_size": 16384.0, "cl_loss": 4.159, "doc_norm": 1.3795, "encoder_q-embeddings": 2105.8823, "encoder_q-layer.0": 1616.8704, "encoder_q-layer.1": 1696.6984, "encoder_q-layer.10": 333.6494, "encoder_q-layer.11": 972.3795, "encoder_q-layer.2": 1166.0076, "encoder_q-layer.3": 1101.6293, "encoder_q-layer.4": 1176.3479, "encoder_q-layer.5": 1036.2372, "encoder_q-layer.6": 1041.6136, "encoder_q-layer.7": 847.4548, "encoder_q-layer.8": 551.3036, "encoder_q-layer.9": 311.6892, "epoch": 0.19, "inbatch_neg_score": 0.2652, "inbatch_pos_score": 0.7725, "learning_rate": 3.9000000000000006e-05, "loss": 4.159, "norm_diff": 0.0506, "norm_loss": 0.0, "num_token_doc": 66.7157, "num_token_overlap": 11.6317, "num_token_query": 31.333, "num_token_union": 65.0955, "num_word_context": 202.0317, "num_word_doc": 49.7706, "num_word_query": 23.2789, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1860.256, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2656, "query_norm": 1.3289, "queue_k_norm": 1.3824, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.333, "sent_len_1": 66.7157, "sent_len_max_0": 127.5325, "sent_len_max_1": 189.5625, "stdk": 0.0462, "stdq": 0.0413, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 29800 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 4.1272, "doc_norm": 1.3804, "encoder_q-embeddings": 791.3012, "encoder_q-layer.0": 602.6003, "encoder_q-layer.1": 665.7232, "encoder_q-layer.10": 345.3411, "encoder_q-layer.11": 964.0334, "encoder_q-layer.2": 671.147, "encoder_q-layer.3": 672.4222, "encoder_q-layer.4": 661.2504, "encoder_q-layer.5": 496.5811, "encoder_q-layer.6": 444.651, "encoder_q-layer.7": 399.0209, "encoder_q-layer.8": 379.2117, "encoder_q-layer.9": 313.0737, "epoch": 0.19, "inbatch_neg_score": 0.266, "inbatch_pos_score": 0.8242, "learning_rate": 3.894444444444444e-05, "loss": 4.1272, "norm_diff": 0.0215, "norm_loss": 0.0, "num_token_doc": 66.868, "num_token_overlap": 11.6532, "num_token_query": 31.2681, "num_token_union": 65.1308, "num_word_context": 202.4212, "num_word_doc": 49.9368, "num_word_query": 23.2186, "postclip_grad_norm": 1.0, "preclip_grad_norm": 925.8024, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2671, "query_norm": 1.3652, "queue_k_norm": 1.3825, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2681, "sent_len_1": 66.868, "sent_len_max_0": 127.1637, "sent_len_max_1": 187.7212, "stdk": 0.0462, "stdq": 0.0429, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 29900 }, { "accuracy": 40.918, "active_queue_size": 16384.0, "cl_loss": 4.1333, "doc_norm": 1.3762, "encoder_q-embeddings": 564.5112, "encoder_q-layer.0": 384.885, "encoder_q-layer.1": 406.6191, "encoder_q-layer.10": 364.2029, "encoder_q-layer.11": 1018.3831, "encoder_q-layer.2": 441.9233, "encoder_q-layer.3": 444.756, "encoder_q-layer.4": 429.3769, "encoder_q-layer.5": 429.5414, "encoder_q-layer.6": 404.6449, "encoder_q-layer.7": 402.1483, "encoder_q-layer.8": 392.8898, "encoder_q-layer.9": 350.8305, "epoch": 0.2, "inbatch_neg_score": 0.2774, "inbatch_pos_score": 0.7964, "learning_rate": 3.888888888888889e-05, "loss": 4.1333, "norm_diff": 0.0056, "norm_loss": 0.0, "num_token_doc": 66.9511, "num_token_overlap": 11.6375, "num_token_query": 31.2376, "num_token_union": 65.1981, "num_word_context": 202.4554, "num_word_doc": 49.976, "num_word_query": 23.2017, "postclip_grad_norm": 1.0, "preclip_grad_norm": 776.2486, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2759, "query_norm": 1.3741, "queue_k_norm": 1.3814, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2376, "sent_len_1": 66.9511, "sent_len_max_0": 127.3037, "sent_len_max_1": 188.9837, "stdk": 0.046, "stdq": 0.0431, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 30000 }, { "dev_runtime": 28.5229, "dev_samples_per_second": 2.244, "dev_steps_per_second": 0.035, "epoch": 0.2, "step": 30000, "test_accuracy": 91.61376953125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.47593608498573303, "test_doc_norm": 1.3277735710144043, "test_inbatch_neg_score": 0.5826038122177124, "test_inbatch_pos_score": 1.4080125093460083, "test_loss": 0.47593608498573303, "test_loss_align": 1.1323888301849365, "test_loss_unif": 3.84432315826416, "test_loss_unif_q@queue": 3.844322919845581, "test_norm_diff": 0.13988399505615234, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.26438891887664795, "test_query_norm": 1.4676575660705566, "test_queue_k_norm": 1.3814136981964111, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.03930444270372391, "test_stdq": 0.041538264602422714, "test_stdqueue_k": 0.04633412882685661, "test_stdqueue_q": 0.0 }, { "dev_runtime": 28.5229, "dev_samples_per_second": 2.244, "dev_steps_per_second": 0.035, "epoch": 0.2, "eval_beir-arguana_ndcg@10": 0.29273, "eval_beir-arguana_recall@10": 0.50853, "eval_beir-arguana_recall@100": 0.82432, "eval_beir-arguana_recall@20": 0.63371, "eval_beir-avg_ndcg@10": 0.3342128333333333, "eval_beir-avg_recall@10": 0.3975870833333333, "eval_beir-avg_recall@100": 0.5848705833333334, "eval_beir-avg_recall@20": 0.45569766666666667, "eval_beir-cqadupstack_ndcg@10": 0.21779833333333332, "eval_beir-cqadupstack_recall@10": 0.3020308333333333, "eval_beir-cqadupstack_recall@100": 0.5265058333333333, "eval_beir-cqadupstack_recall@20": 0.36416666666666675, "eval_beir-fiqa_ndcg@10": 0.20048, "eval_beir-fiqa_recall@10": 0.26808, "eval_beir-fiqa_recall@100": 0.5194, "eval_beir-fiqa_recall@20": 0.33145, "eval_beir-nfcorpus_ndcg@10": 0.27591, "eval_beir-nfcorpus_recall@10": 0.14048, "eval_beir-nfcorpus_recall@100": 0.25711, "eval_beir-nfcorpus_recall@20": 0.16825, "eval_beir-nq_ndcg@10": 0.22164, "eval_beir-nq_recall@10": 0.37283, "eval_beir-nq_recall@100": 0.71181, "eval_beir-nq_recall@20": 0.47842, "eval_beir-quora_ndcg@10": 0.71773, "eval_beir-quora_recall@10": 0.84152, "eval_beir-quora_recall@100": 0.9638, "eval_beir-quora_recall@20": 0.89536, "eval_beir-scidocs_ndcg@10": 0.13365, "eval_beir-scidocs_recall@10": 0.14287, "eval_beir-scidocs_recall@100": 0.3311, "eval_beir-scidocs_recall@20": 0.19052, "eval_beir-scifact_ndcg@10": 0.57213, "eval_beir-scifact_recall@10": 0.72794, "eval_beir-scifact_recall@100": 0.90489, "eval_beir-scifact_recall@20": 0.80689, "eval_beir-trec-covid_ndcg@10": 0.4963, "eval_beir-trec-covid_recall@10": 0.528, "eval_beir-trec-covid_recall@100": 0.3776, "eval_beir-trec-covid_recall@20": 0.492, "eval_beir-webis-touche2020_ndcg@10": 0.21376, "eval_beir-webis-touche2020_recall@10": 0.14359, "eval_beir-webis-touche2020_recall@100": 0.43217, "eval_beir-webis-touche2020_recall@20": 0.19621, "eval_senteval-avg_sts": 0.7423311912355756, "eval_senteval-sickr_spearman": 0.702532188605713, "eval_senteval-stsb_spearman": 0.7821301938654381, "step": 30000, "test_accuracy": 91.61376953125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.47593608498573303, "test_doc_norm": 1.3277735710144043, "test_inbatch_neg_score": 0.5826038122177124, "test_inbatch_pos_score": 1.4080125093460083, "test_loss": 0.47593608498573303, "test_loss_align": 1.1323888301849365, "test_loss_unif": 3.84432315826416, "test_loss_unif_q@queue": 3.844322919845581, "test_norm_diff": 0.13988399505615234, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.26438891887664795, "test_query_norm": 1.4676575660705566, "test_queue_k_norm": 1.3814136981964111, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.03930444270372391, "test_stdq": 0.041538264602422714, "test_stdqueue_k": 0.04633412882685661, "test_stdqueue_q": 0.0 }, { "accuracy": 42.3828, "active_queue_size": 16384.0, "cl_loss": 4.1335, "doc_norm": 1.3865, "encoder_q-embeddings": 1616.754, "encoder_q-layer.0": 1167.0494, "encoder_q-layer.1": 1248.2042, "encoder_q-layer.10": 453.5858, "encoder_q-layer.11": 1119.1761, "encoder_q-layer.2": 1447.2966, "encoder_q-layer.3": 1788.2839, "encoder_q-layer.4": 2031.0046, "encoder_q-layer.5": 1765.3053, "encoder_q-layer.6": 1465.5239, "encoder_q-layer.7": 1535.0656, "encoder_q-layer.8": 917.4036, "encoder_q-layer.9": 431.8478, "epoch": 0.2, "inbatch_neg_score": 0.2763, "inbatch_pos_score": 0.8047, "learning_rate": 3.883333333333333e-05, "loss": 4.1335, "norm_diff": 0.0175, "norm_loss": 0.0, "num_token_doc": 66.7174, "num_token_overlap": 11.646, "num_token_query": 31.2215, "num_token_union": 65.0514, "num_word_context": 202.3693, "num_word_doc": 49.7966, "num_word_query": 23.1914, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2089.0898, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2761, "query_norm": 1.4001, "queue_k_norm": 1.381, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2215, "sent_len_1": 66.7174, "sent_len_max_0": 127.4188, "sent_len_max_1": 188.065, "stdk": 0.0464, "stdq": 0.0441, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 30100 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 4.1157, "doc_norm": 1.376, "encoder_q-embeddings": 7398.6567, "encoder_q-layer.0": 5630.709, "encoder_q-layer.1": 5687.3394, "encoder_q-layer.10": 686.9046, "encoder_q-layer.11": 1918.5276, "encoder_q-layer.2": 5481.5664, "encoder_q-layer.3": 4846.4385, "encoder_q-layer.4": 3601.4485, "encoder_q-layer.5": 3104.1494, "encoder_q-layer.6": 3035.6218, "encoder_q-layer.7": 2775.8684, "encoder_q-layer.8": 1713.4838, "encoder_q-layer.9": 1024.293, "epoch": 0.2, "inbatch_neg_score": 0.2741, "inbatch_pos_score": 0.7944, "learning_rate": 3.877777777777778e-05, "loss": 4.1157, "norm_diff": 0.04, "norm_loss": 0.0, "num_token_doc": 66.9496, "num_token_overlap": 11.7078, "num_token_query": 31.3515, "num_token_union": 65.1376, "num_word_context": 202.2945, "num_word_doc": 49.9312, "num_word_query": 23.2831, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6601.5351, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2739, "query_norm": 1.3359, "queue_k_norm": 1.3811, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3515, "sent_len_1": 66.9496, "sent_len_max_0": 127.2588, "sent_len_max_1": 190.68, "stdk": 0.046, "stdq": 0.0416, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 30200 }, { "accuracy": 41.8945, "active_queue_size": 16384.0, "cl_loss": 4.1299, "doc_norm": 1.3856, "encoder_q-embeddings": 1270.0566, "encoder_q-layer.0": 993.3732, "encoder_q-layer.1": 901.8093, "encoder_q-layer.10": 385.6281, "encoder_q-layer.11": 1012.5636, "encoder_q-layer.2": 915.7361, "encoder_q-layer.3": 909.6533, "encoder_q-layer.4": 949.4545, "encoder_q-layer.5": 870.4939, "encoder_q-layer.6": 830.5358, "encoder_q-layer.7": 829.7441, "encoder_q-layer.8": 575.7458, "encoder_q-layer.9": 355.6037, "epoch": 0.2, "inbatch_neg_score": 0.2731, "inbatch_pos_score": 0.793, "learning_rate": 3.8722222222222225e-05, "loss": 4.1299, "norm_diff": 0.024, "norm_loss": 0.0, "num_token_doc": 66.5948, "num_token_overlap": 11.6311, "num_token_query": 31.2095, "num_token_union": 64.9307, "num_word_context": 201.777, "num_word_doc": 49.6822, "num_word_query": 23.1718, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1356.5382, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2732, "query_norm": 1.3616, "queue_k_norm": 1.3819, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2095, "sent_len_1": 66.5948, "sent_len_max_0": 127.4062, "sent_len_max_1": 189.0263, "stdk": 0.0463, "stdq": 0.0425, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 30300 }, { "accuracy": 41.4062, "active_queue_size": 16384.0, "cl_loss": 4.1268, "doc_norm": 1.3901, "encoder_q-embeddings": 596.2724, "encoder_q-layer.0": 442.34, "encoder_q-layer.1": 473.213, "encoder_q-layer.10": 360.1994, "encoder_q-layer.11": 953.4673, "encoder_q-layer.2": 536.0628, "encoder_q-layer.3": 543.1691, "encoder_q-layer.4": 539.356, "encoder_q-layer.5": 507.9272, "encoder_q-layer.6": 559.9794, "encoder_q-layer.7": 516.2946, "encoder_q-layer.8": 443.1778, "encoder_q-layer.9": 359.1134, "epoch": 0.2, "inbatch_neg_score": 0.2758, "inbatch_pos_score": 0.8135, "learning_rate": 3.866666666666667e-05, "loss": 4.1268, "norm_diff": 0.0223, "norm_loss": 0.0, "num_token_doc": 66.7046, "num_token_overlap": 11.6306, "num_token_query": 31.3766, "num_token_union": 65.1319, "num_word_context": 202.2437, "num_word_doc": 49.7489, "num_word_query": 23.3084, "postclip_grad_norm": 1.0, "preclip_grad_norm": 839.3314, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2766, "query_norm": 1.3693, "queue_k_norm": 1.3825, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3766, "sent_len_1": 66.7046, "sent_len_max_0": 127.4725, "sent_len_max_1": 190.325, "stdk": 0.0465, "stdq": 0.043, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 30400 }, { "accuracy": 40.4297, "active_queue_size": 16384.0, "cl_loss": 4.1427, "doc_norm": 1.3828, "encoder_q-embeddings": 3918.2566, "encoder_q-layer.0": 2622.4285, "encoder_q-layer.1": 2693.0796, "encoder_q-layer.10": 401.4359, "encoder_q-layer.11": 1011.8625, "encoder_q-layer.2": 3330.387, "encoder_q-layer.3": 3481.0696, "encoder_q-layer.4": 3049.0088, "encoder_q-layer.5": 3081.168, "encoder_q-layer.6": 2524.0825, "encoder_q-layer.7": 1836.6296, "encoder_q-layer.8": 1204.3431, "encoder_q-layer.9": 453.67, "epoch": 0.2, "inbatch_neg_score": 0.2725, "inbatch_pos_score": 0.7896, "learning_rate": 3.8611111111111116e-05, "loss": 4.1427, "norm_diff": 0.0288, "norm_loss": 0.0, "num_token_doc": 66.9066, "num_token_overlap": 11.6308, "num_token_query": 31.233, "num_token_union": 65.0868, "num_word_context": 202.396, "num_word_doc": 49.8924, "num_word_query": 23.1845, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3959.192, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2715, "query_norm": 1.3541, "queue_k_norm": 1.3834, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.233, "sent_len_1": 66.9066, "sent_len_max_0": 127.5425, "sent_len_max_1": 190.2475, "stdk": 0.0462, "stdq": 0.0426, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 30500 }, { "accuracy": 40.625, "active_queue_size": 16384.0, "cl_loss": 4.1449, "doc_norm": 1.3835, "encoder_q-embeddings": 1471.5391, "encoder_q-layer.0": 986.5989, "encoder_q-layer.1": 1075.6709, "encoder_q-layer.10": 384.8863, "encoder_q-layer.11": 953.0389, "encoder_q-layer.2": 1194.9028, "encoder_q-layer.3": 1341.5452, "encoder_q-layer.4": 1210.8583, "encoder_q-layer.5": 1032.2336, "encoder_q-layer.6": 885.4963, "encoder_q-layer.7": 702.4738, "encoder_q-layer.8": 600.3744, "encoder_q-layer.9": 403.9936, "epoch": 0.2, "inbatch_neg_score": 0.2696, "inbatch_pos_score": 0.8057, "learning_rate": 3.855555555555556e-05, "loss": 4.1449, "norm_diff": 0.0175, "norm_loss": 0.0, "num_token_doc": 66.6542, "num_token_overlap": 11.6129, "num_token_query": 31.2554, "num_token_union": 65.009, "num_word_context": 202.3806, "num_word_doc": 49.7563, "num_word_query": 23.2074, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1527.5931, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2681, "query_norm": 1.3692, "queue_k_norm": 1.3828, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2554, "sent_len_1": 66.6542, "sent_len_max_0": 127.4475, "sent_len_max_1": 188.9187, "stdk": 0.0462, "stdq": 0.0434, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 30600 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 4.1314, "doc_norm": 1.3871, "encoder_q-embeddings": 667.5109, "encoder_q-layer.0": 487.6125, "encoder_q-layer.1": 521.132, "encoder_q-layer.10": 357.3423, "encoder_q-layer.11": 942.3952, "encoder_q-layer.2": 590.4515, "encoder_q-layer.3": 648.9149, "encoder_q-layer.4": 623.4149, "encoder_q-layer.5": 543.3213, "encoder_q-layer.6": 534.585, "encoder_q-layer.7": 520.6323, "encoder_q-layer.8": 453.3513, "encoder_q-layer.9": 339.761, "epoch": 0.2, "inbatch_neg_score": 0.2585, "inbatch_pos_score": 0.7827, "learning_rate": 3.85e-05, "loss": 4.1314, "norm_diff": 0.0542, "norm_loss": 0.0, "num_token_doc": 66.7584, "num_token_overlap": 11.7028, "num_token_query": 31.4008, "num_token_union": 65.1053, "num_word_context": 202.2123, "num_word_doc": 49.863, "num_word_query": 23.3226, "postclip_grad_norm": 1.0, "preclip_grad_norm": 886.7792, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.259, "query_norm": 1.3329, "queue_k_norm": 1.3828, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4008, "sent_len_1": 66.7584, "sent_len_max_0": 127.5275, "sent_len_max_1": 189.565, "stdk": 0.0464, "stdq": 0.042, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 30700 }, { "accuracy": 42.5781, "active_queue_size": 16384.0, "cl_loss": 4.1084, "doc_norm": 1.3809, "encoder_q-embeddings": 1001.6577, "encoder_q-layer.0": 742.4617, "encoder_q-layer.1": 761.9987, "encoder_q-layer.10": 350.8324, "encoder_q-layer.11": 972.1655, "encoder_q-layer.2": 861.7181, "encoder_q-layer.3": 812.8576, "encoder_q-layer.4": 756.846, "encoder_q-layer.5": 636.8295, "encoder_q-layer.6": 640.6967, "encoder_q-layer.7": 546.4552, "encoder_q-layer.8": 431.347, "encoder_q-layer.9": 328.8362, "epoch": 0.2, "inbatch_neg_score": 0.2529, "inbatch_pos_score": 0.7729, "learning_rate": 3.844444444444444e-05, "loss": 4.1084, "norm_diff": 0.0638, "norm_loss": 0.0, "num_token_doc": 66.7821, "num_token_overlap": 11.7462, "num_token_query": 31.5398, "num_token_union": 65.1615, "num_word_context": 202.293, "num_word_doc": 49.8382, "num_word_query": 23.4407, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1124.8433, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2522, "query_norm": 1.3171, "queue_k_norm": 1.3835, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5398, "sent_len_1": 66.7821, "sent_len_max_0": 127.5875, "sent_len_max_1": 189.8063, "stdk": 0.0462, "stdq": 0.0415, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 30800 }, { "accuracy": 41.3086, "active_queue_size": 16384.0, "cl_loss": 4.1174, "doc_norm": 1.3856, "encoder_q-embeddings": 1367.9323, "encoder_q-layer.0": 1081.0226, "encoder_q-layer.1": 1048.6498, "encoder_q-layer.10": 389.9107, "encoder_q-layer.11": 1072.2561, "encoder_q-layer.2": 1032.847, "encoder_q-layer.3": 951.0962, "encoder_q-layer.4": 886.6753, "encoder_q-layer.5": 709.605, "encoder_q-layer.6": 804.9192, "encoder_q-layer.7": 715.9906, "encoder_q-layer.8": 554.706, "encoder_q-layer.9": 377.326, "epoch": 0.2, "inbatch_neg_score": 0.2654, "inbatch_pos_score": 0.7744, "learning_rate": 3.838888888888889e-05, "loss": 4.1174, "norm_diff": 0.0566, "norm_loss": 0.0, "num_token_doc": 66.9064, "num_token_overlap": 11.6936, "num_token_query": 31.5168, "num_token_union": 65.2998, "num_word_context": 202.6068, "num_word_doc": 49.9517, "num_word_query": 23.4365, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1417.1249, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2651, "query_norm": 1.329, "queue_k_norm": 1.3828, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5168, "sent_len_1": 66.9064, "sent_len_max_0": 127.4525, "sent_len_max_1": 189.0238, "stdk": 0.0464, "stdq": 0.0414, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 30900 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 4.1244, "doc_norm": 1.392, "encoder_q-embeddings": 6746.3467, "encoder_q-layer.0": 5523.2607, "encoder_q-layer.1": 6102.7666, "encoder_q-layer.10": 357.5951, "encoder_q-layer.11": 1029.4182, "encoder_q-layer.2": 6095.6514, "encoder_q-layer.3": 6192.6279, "encoder_q-layer.4": 6186.2827, "encoder_q-layer.5": 5588.4761, "encoder_q-layer.6": 5173.9424, "encoder_q-layer.7": 4016.343, "encoder_q-layer.8": 2716.1482, "encoder_q-layer.9": 622.2361, "epoch": 0.2, "inbatch_neg_score": 0.2692, "inbatch_pos_score": 0.8066, "learning_rate": 3.8333333333333334e-05, "loss": 4.1244, "norm_diff": 0.0363, "norm_loss": 0.0, "num_token_doc": 66.8908, "num_token_overlap": 11.6701, "num_token_query": 31.3378, "num_token_union": 65.1533, "num_word_context": 202.4652, "num_word_doc": 49.9226, "num_word_query": 23.2786, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7507.7829, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.269, "query_norm": 1.3557, "queue_k_norm": 1.3835, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3378, "sent_len_1": 66.8908, "sent_len_max_0": 127.46, "sent_len_max_1": 188.8187, "stdk": 0.0466, "stdq": 0.0424, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 31000 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 4.1152, "doc_norm": 1.3821, "encoder_q-embeddings": 707.4713, "encoder_q-layer.0": 505.8242, "encoder_q-layer.1": 560.752, "encoder_q-layer.10": 366.7886, "encoder_q-layer.11": 961.7623, "encoder_q-layer.2": 604.1333, "encoder_q-layer.3": 631.3364, "encoder_q-layer.4": 632.9852, "encoder_q-layer.5": 597.3018, "encoder_q-layer.6": 603.2499, "encoder_q-layer.7": 486.678, "encoder_q-layer.8": 367.321, "encoder_q-layer.9": 303.4538, "epoch": 0.2, "inbatch_neg_score": 0.2554, "inbatch_pos_score": 0.7812, "learning_rate": 3.827777777777778e-05, "loss": 4.1152, "norm_diff": 0.0565, "norm_loss": 0.0, "num_token_doc": 66.8412, "num_token_overlap": 11.7112, "num_token_query": 31.4615, "num_token_union": 65.1824, "num_word_context": 202.113, "num_word_doc": 49.8634, "num_word_query": 23.359, "postclip_grad_norm": 1.0, "preclip_grad_norm": 907.0348, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2563, "query_norm": 1.3256, "queue_k_norm": 1.383, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4615, "sent_len_1": 66.8412, "sent_len_max_0": 127.4425, "sent_len_max_1": 190.7887, "stdk": 0.0462, "stdq": 0.0418, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 31100 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 4.115, "doc_norm": 1.3779, "encoder_q-embeddings": 634.175, "encoder_q-layer.0": 433.841, "encoder_q-layer.1": 507.0819, "encoder_q-layer.10": 407.9597, "encoder_q-layer.11": 1052.2141, "encoder_q-layer.2": 637.4971, "encoder_q-layer.3": 653.2, "encoder_q-layer.4": 577.2435, "encoder_q-layer.5": 474.9653, "encoder_q-layer.6": 441.002, "encoder_q-layer.7": 380.3176, "encoder_q-layer.8": 423.7273, "encoder_q-layer.9": 358.158, "epoch": 0.2, "inbatch_neg_score": 0.2554, "inbatch_pos_score": 0.7622, "learning_rate": 3.8222222222222226e-05, "loss": 4.115, "norm_diff": 0.0599, "norm_loss": 0.0, "num_token_doc": 66.9486, "num_token_overlap": 11.7345, "num_token_query": 31.5679, "num_token_union": 65.2962, "num_word_context": 202.3732, "num_word_doc": 49.9807, "num_word_query": 23.4721, "postclip_grad_norm": 1.0, "preclip_grad_norm": 859.1468, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2559, "query_norm": 1.318, "queue_k_norm": 1.3852, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5679, "sent_len_1": 66.9486, "sent_len_max_0": 127.4237, "sent_len_max_1": 189.0387, "stdk": 0.0461, "stdq": 0.0414, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 31200 }, { "accuracy": 39.0625, "active_queue_size": 16384.0, "cl_loss": 4.1165, "doc_norm": 1.3763, "encoder_q-embeddings": 976.0298, "encoder_q-layer.0": 675.7849, "encoder_q-layer.1": 738.3496, "encoder_q-layer.10": 350.4481, "encoder_q-layer.11": 1029.8499, "encoder_q-layer.2": 838.0781, "encoder_q-layer.3": 976.7582, "encoder_q-layer.4": 873.8148, "encoder_q-layer.5": 719.4306, "encoder_q-layer.6": 623.9778, "encoder_q-layer.7": 496.5159, "encoder_q-layer.8": 398.2442, "encoder_q-layer.9": 318.4012, "epoch": 0.2, "inbatch_neg_score": 0.253, "inbatch_pos_score": 0.7554, "learning_rate": 3.816666666666667e-05, "loss": 4.1165, "norm_diff": 0.0352, "norm_loss": 0.0, "num_token_doc": 66.8398, "num_token_overlap": 11.6417, "num_token_query": 31.239, "num_token_union": 65.0904, "num_word_context": 202.3144, "num_word_doc": 49.8807, "num_word_query": 23.1741, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1135.7281, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2524, "query_norm": 1.3411, "queue_k_norm": 1.383, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.239, "sent_len_1": 66.8398, "sent_len_max_0": 127.2237, "sent_len_max_1": 190.2363, "stdk": 0.046, "stdq": 0.0425, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 31300 }, { "accuracy": 42.8711, "active_queue_size": 16384.0, "cl_loss": 4.1205, "doc_norm": 1.3817, "encoder_q-embeddings": 877.0992, "encoder_q-layer.0": 608.0387, "encoder_q-layer.1": 668.8583, "encoder_q-layer.10": 408.7061, "encoder_q-layer.11": 1085.1971, "encoder_q-layer.2": 741.5899, "encoder_q-layer.3": 733.9916, "encoder_q-layer.4": 702.1453, "encoder_q-layer.5": 622.2269, "encoder_q-layer.6": 616.1492, "encoder_q-layer.7": 593.2297, "encoder_q-layer.8": 518.0955, "encoder_q-layer.9": 375.9543, "epoch": 0.2, "inbatch_neg_score": 0.2643, "inbatch_pos_score": 0.7935, "learning_rate": 3.811111111111112e-05, "loss": 4.1205, "norm_diff": 0.0348, "norm_loss": 0.0, "num_token_doc": 66.6639, "num_token_overlap": 11.7041, "num_token_query": 31.3278, "num_token_union": 65.0147, "num_word_context": 202.2589, "num_word_doc": 49.7744, "num_word_query": 23.2696, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1041.3303, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2629, "query_norm": 1.3469, "queue_k_norm": 1.3826, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3278, "sent_len_1": 66.6639, "sent_len_max_0": 127.4488, "sent_len_max_1": 188.6813, "stdk": 0.0462, "stdq": 0.0425, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 31400 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.1098, "doc_norm": 1.3777, "encoder_q-embeddings": 654.0236, "encoder_q-layer.0": 460.541, "encoder_q-layer.1": 490.2008, "encoder_q-layer.10": 351.9351, "encoder_q-layer.11": 956.8419, "encoder_q-layer.2": 560.0896, "encoder_q-layer.3": 607.1803, "encoder_q-layer.4": 605.9705, "encoder_q-layer.5": 563.4059, "encoder_q-layer.6": 562.586, "encoder_q-layer.7": 526.6415, "encoder_q-layer.8": 496.4007, "encoder_q-layer.9": 321.9683, "epoch": 0.21, "inbatch_neg_score": 0.2524, "inbatch_pos_score": 0.7593, "learning_rate": 3.805555555555555e-05, "loss": 4.1098, "norm_diff": 0.0371, "norm_loss": 0.0, "num_token_doc": 66.8583, "num_token_overlap": 11.6946, "num_token_query": 31.5132, "num_token_union": 65.2229, "num_word_context": 202.6218, "num_word_doc": 49.8961, "num_word_query": 23.392, "postclip_grad_norm": 1.0, "preclip_grad_norm": 879.6107, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2517, "query_norm": 1.3406, "queue_k_norm": 1.3839, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.5132, "sent_len_1": 66.8583, "sent_len_max_0": 127.47, "sent_len_max_1": 188.9162, "stdk": 0.0461, "stdq": 0.0423, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 31500 }, { "accuracy": 44.3359, "active_queue_size": 16384.0, "cl_loss": 4.1239, "doc_norm": 1.387, "encoder_q-embeddings": 623.0272, "encoder_q-layer.0": 423.1895, "encoder_q-layer.1": 468.2343, "encoder_q-layer.10": 368.104, "encoder_q-layer.11": 1020.1387, "encoder_q-layer.2": 520.6738, "encoder_q-layer.3": 561.2794, "encoder_q-layer.4": 541.0073, "encoder_q-layer.5": 473.4698, "encoder_q-layer.6": 428.0001, "encoder_q-layer.7": 396.4237, "encoder_q-layer.8": 431.7758, "encoder_q-layer.9": 356.9964, "epoch": 0.21, "inbatch_neg_score": 0.2429, "inbatch_pos_score": 0.7471, "learning_rate": 3.8e-05, "loss": 4.1239, "norm_diff": 0.106, "norm_loss": 0.0, "num_token_doc": 66.6805, "num_token_overlap": 11.6094, "num_token_query": 31.2305, "num_token_union": 65.0693, "num_word_context": 202.3972, "num_word_doc": 49.8234, "num_word_query": 23.1861, "postclip_grad_norm": 1.0, "preclip_grad_norm": 827.2348, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2432, "query_norm": 1.281, "queue_k_norm": 1.3813, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2305, "sent_len_1": 66.6805, "sent_len_max_0": 127.4513, "sent_len_max_1": 187.495, "stdk": 0.0465, "stdq": 0.0404, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 31600 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 4.1199, "doc_norm": 1.3822, "encoder_q-embeddings": 663.1813, "encoder_q-layer.0": 463.5103, "encoder_q-layer.1": 533.0443, "encoder_q-layer.10": 330.7321, "encoder_q-layer.11": 868.6927, "encoder_q-layer.2": 609.9901, "encoder_q-layer.3": 631.5333, "encoder_q-layer.4": 628.6125, "encoder_q-layer.5": 599.9783, "encoder_q-layer.6": 506.9548, "encoder_q-layer.7": 440.0614, "encoder_q-layer.8": 427.3423, "encoder_q-layer.9": 290.4176, "epoch": 0.21, "inbatch_neg_score": 0.2308, "inbatch_pos_score": 0.7754, "learning_rate": 3.7944444444444444e-05, "loss": 4.1199, "norm_diff": 0.0472, "norm_loss": 0.0, "num_token_doc": 66.5603, "num_token_overlap": 11.5959, "num_token_query": 31.2627, "num_token_union": 64.9195, "num_word_context": 202.1194, "num_word_doc": 49.6545, "num_word_query": 23.2291, "postclip_grad_norm": 1.0, "preclip_grad_norm": 852.5863, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2311, "query_norm": 1.3349, "queue_k_norm": 1.3804, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2627, "sent_len_1": 66.5603, "sent_len_max_0": 127.6088, "sent_len_max_1": 188.475, "stdk": 0.0464, "stdq": 0.043, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 31700 }, { "accuracy": 41.3086, "active_queue_size": 16384.0, "cl_loss": 4.1278, "doc_norm": 1.3837, "encoder_q-embeddings": 2144.2876, "encoder_q-layer.0": 1513.0388, "encoder_q-layer.1": 1787.0641, "encoder_q-layer.10": 358.7401, "encoder_q-layer.11": 971.9859, "encoder_q-layer.2": 2202.2356, "encoder_q-layer.3": 2325.7373, "encoder_q-layer.4": 2034.7712, "encoder_q-layer.5": 1818.6084, "encoder_q-layer.6": 1704.5469, "encoder_q-layer.7": 1089.1605, "encoder_q-layer.8": 758.6951, "encoder_q-layer.9": 357.1906, "epoch": 0.21, "inbatch_neg_score": 0.227, "inbatch_pos_score": 0.7344, "learning_rate": 3.7888888888888894e-05, "loss": 4.1278, "norm_diff": 0.0734, "norm_loss": 0.0, "num_token_doc": 66.9296, "num_token_overlap": 11.6424, "num_token_query": 31.3133, "num_token_union": 65.1937, "num_word_context": 202.4396, "num_word_doc": 49.9121, "num_word_query": 23.2492, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2481.6384, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2264, "query_norm": 1.3103, "queue_k_norm": 1.3826, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3133, "sent_len_1": 66.9296, "sent_len_max_0": 127.5125, "sent_len_max_1": 191.5938, "stdk": 0.0464, "stdq": 0.0422, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 31800 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 4.1261, "doc_norm": 1.382, "encoder_q-embeddings": 730.4037, "encoder_q-layer.0": 514.4986, "encoder_q-layer.1": 574.2275, "encoder_q-layer.10": 364.0914, "encoder_q-layer.11": 954.9626, "encoder_q-layer.2": 672.4323, "encoder_q-layer.3": 728.9658, "encoder_q-layer.4": 743.9592, "encoder_q-layer.5": 775.3157, "encoder_q-layer.6": 635.1743, "encoder_q-layer.7": 482.3985, "encoder_q-layer.8": 406.4497, "encoder_q-layer.9": 314.2894, "epoch": 0.21, "inbatch_neg_score": 0.2219, "inbatch_pos_score": 0.7329, "learning_rate": 3.7833333333333336e-05, "loss": 4.1261, "norm_diff": 0.0806, "norm_loss": 0.0, "num_token_doc": 66.9537, "num_token_overlap": 11.6963, "num_token_query": 31.3596, "num_token_union": 65.1996, "num_word_context": 202.5223, "num_word_doc": 49.9522, "num_word_query": 23.3003, "postclip_grad_norm": 1.0, "preclip_grad_norm": 947.807, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2233, "query_norm": 1.3013, "queue_k_norm": 1.3797, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3596, "sent_len_1": 66.9537, "sent_len_max_0": 127.4062, "sent_len_max_1": 189.8775, "stdk": 0.0465, "stdq": 0.0418, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 31900 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 4.1146, "doc_norm": 1.3829, "encoder_q-embeddings": 3554.2891, "encoder_q-layer.0": 2452.2959, "encoder_q-layer.1": 2635.8381, "encoder_q-layer.10": 385.1143, "encoder_q-layer.11": 1044.9572, "encoder_q-layer.2": 3047.1917, "encoder_q-layer.3": 3291.2207, "encoder_q-layer.4": 3356.5024, "encoder_q-layer.5": 3755.9712, "encoder_q-layer.6": 3176.5181, "encoder_q-layer.7": 2424.1084, "encoder_q-layer.8": 1141.2527, "encoder_q-layer.9": 395.687, "epoch": 0.21, "inbatch_neg_score": 0.2281, "inbatch_pos_score": 0.7603, "learning_rate": 3.777777777777778e-05, "loss": 4.1146, "norm_diff": 0.0392, "norm_loss": 0.0, "num_token_doc": 66.7837, "num_token_overlap": 11.6999, "num_token_query": 31.3657, "num_token_union": 65.0964, "num_word_context": 202.3542, "num_word_doc": 49.8464, "num_word_query": 23.295, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3906.275, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2285, "query_norm": 1.3437, "queue_k_norm": 1.3781, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3657, "sent_len_1": 66.7837, "sent_len_max_0": 127.3575, "sent_len_max_1": 190.21, "stdk": 0.0465, "stdq": 0.0431, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 32000 }, { "accuracy": 42.7734, "active_queue_size": 16384.0, "cl_loss": 4.0881, "doc_norm": 1.3765, "encoder_q-embeddings": 1066.2557, "encoder_q-layer.0": 867.5141, "encoder_q-layer.1": 881.2632, "encoder_q-layer.10": 363.0638, "encoder_q-layer.11": 970.2379, "encoder_q-layer.2": 912.7974, "encoder_q-layer.3": 816.5272, "encoder_q-layer.4": 752.7207, "encoder_q-layer.5": 635.9157, "encoder_q-layer.6": 589.1334, "encoder_q-layer.7": 453.3347, "encoder_q-layer.8": 443.7244, "encoder_q-layer.9": 334.9987, "epoch": 0.21, "inbatch_neg_score": 0.2315, "inbatch_pos_score": 0.7451, "learning_rate": 3.772222222222223e-05, "loss": 4.0881, "norm_diff": 0.054, "norm_loss": 0.0, "num_token_doc": 66.8229, "num_token_overlap": 11.6928, "num_token_query": 31.4546, "num_token_union": 65.1318, "num_word_context": 202.1631, "num_word_doc": 49.8906, "num_word_query": 23.3603, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1153.4718, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2308, "query_norm": 1.3225, "queue_k_norm": 1.3772, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4546, "sent_len_1": 66.8229, "sent_len_max_0": 127.7013, "sent_len_max_1": 189.9075, "stdk": 0.0463, "stdq": 0.0422, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 32100 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 4.0975, "doc_norm": 1.3693, "encoder_q-embeddings": 1140.0107, "encoder_q-layer.0": 827.9933, "encoder_q-layer.1": 884.9623, "encoder_q-layer.10": 404.8754, "encoder_q-layer.11": 964.1718, "encoder_q-layer.2": 990.351, "encoder_q-layer.3": 1014.82, "encoder_q-layer.4": 1008.4895, "encoder_q-layer.5": 892.6185, "encoder_q-layer.6": 660.8433, "encoder_q-layer.7": 554.6221, "encoder_q-layer.8": 488.7697, "encoder_q-layer.9": 369.4335, "epoch": 0.21, "inbatch_neg_score": 0.2365, "inbatch_pos_score": 0.7695, "learning_rate": 3.766666666666667e-05, "loss": 4.0975, "norm_diff": 0.013, "norm_loss": 0.0, "num_token_doc": 67.1057, "num_token_overlap": 11.6674, "num_token_query": 31.223, "num_token_union": 65.1963, "num_word_context": 202.4973, "num_word_doc": 50.039, "num_word_query": 23.1687, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1240.6897, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2349, "query_norm": 1.3575, "queue_k_norm": 1.3786, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.223, "sent_len_1": 67.1057, "sent_len_max_0": 127.3637, "sent_len_max_1": 189.74, "stdk": 0.046, "stdq": 0.0432, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 32200 }, { "accuracy": 41.9922, "active_queue_size": 16384.0, "cl_loss": 4.1233, "doc_norm": 1.3774, "encoder_q-embeddings": 2034.7041, "encoder_q-layer.0": 1436.6829, "encoder_q-layer.1": 1611.2942, "encoder_q-layer.10": 687.4628, "encoder_q-layer.11": 1759.8763, "encoder_q-layer.2": 1555.4287, "encoder_q-layer.3": 1330.1693, "encoder_q-layer.4": 1149.7319, "encoder_q-layer.5": 948.0287, "encoder_q-layer.6": 901.8077, "encoder_q-layer.7": 775.1576, "encoder_q-layer.8": 748.1526, "encoder_q-layer.9": 625.3275, "epoch": 0.21, "inbatch_neg_score": 0.2415, "inbatch_pos_score": 0.7607, "learning_rate": 3.761111111111111e-05, "loss": 4.1233, "norm_diff": 0.025, "norm_loss": 0.0, "num_token_doc": 66.852, "num_token_overlap": 11.6548, "num_token_query": 31.2218, "num_token_union": 65.0528, "num_word_context": 202.4258, "num_word_doc": 49.8933, "num_word_query": 23.1648, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2024.5602, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2397, "query_norm": 1.3554, "queue_k_norm": 1.3785, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2218, "sent_len_1": 66.852, "sent_len_max_0": 127.5787, "sent_len_max_1": 189.2025, "stdk": 0.0464, "stdq": 0.0428, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 32300 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.0861, "doc_norm": 1.3855, "encoder_q-embeddings": 1821.1609, "encoder_q-layer.0": 1227.4452, "encoder_q-layer.1": 1262.2211, "encoder_q-layer.10": 709.0712, "encoder_q-layer.11": 1832.2919, "encoder_q-layer.2": 1414.7743, "encoder_q-layer.3": 1438.8816, "encoder_q-layer.4": 1338.6044, "encoder_q-layer.5": 1256.3805, "encoder_q-layer.6": 1241.9481, "encoder_q-layer.7": 1004.4371, "encoder_q-layer.8": 889.8774, "encoder_q-layer.9": 656.2282, "epoch": 0.21, "inbatch_neg_score": 0.2495, "inbatch_pos_score": 0.7856, "learning_rate": 3.7555555555555554e-05, "loss": 4.0861, "norm_diff": 0.0159, "norm_loss": 0.0, "num_token_doc": 66.9149, "num_token_overlap": 11.7026, "num_token_query": 31.4686, "num_token_union": 65.2266, "num_word_context": 202.7537, "num_word_doc": 49.9179, "num_word_query": 23.3712, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1981.4824, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2496, "query_norm": 1.3751, "queue_k_norm": 1.3761, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4686, "sent_len_1": 66.9149, "sent_len_max_0": 127.3762, "sent_len_max_1": 190.7438, "stdk": 0.0467, "stdq": 0.0436, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 32400 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 4.0944, "doc_norm": 1.3756, "encoder_q-embeddings": 7286.7939, "encoder_q-layer.0": 5052.3428, "encoder_q-layer.1": 5226.8843, "encoder_q-layer.10": 759.7524, "encoder_q-layer.11": 1810.217, "encoder_q-layer.2": 5822.5327, "encoder_q-layer.3": 4722.4849, "encoder_q-layer.4": 3804.8438, "encoder_q-layer.5": 3390.5847, "encoder_q-layer.6": 2477.8967, "encoder_q-layer.7": 1826.2217, "encoder_q-layer.8": 969.1267, "encoder_q-layer.9": 615.9684, "epoch": 0.21, "inbatch_neg_score": 0.2555, "inbatch_pos_score": 0.7915, "learning_rate": 3.7500000000000003e-05, "loss": 4.0944, "norm_diff": 0.0145, "norm_loss": 0.0, "num_token_doc": 66.7127, "num_token_overlap": 11.6693, "num_token_query": 31.3608, "num_token_union": 65.09, "num_word_context": 202.08, "num_word_doc": 49.7893, "num_word_query": 23.2978, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6387.3999, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2556, "query_norm": 1.3613, "queue_k_norm": 1.3773, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3608, "sent_len_1": 66.7127, "sent_len_max_0": 127.5088, "sent_len_max_1": 187.1275, "stdk": 0.0464, "stdq": 0.0431, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 32500 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 4.0804, "doc_norm": 1.3781, "encoder_q-embeddings": 1158.8248, "encoder_q-layer.0": 807.3179, "encoder_q-layer.1": 928.02, "encoder_q-layer.10": 699.0783, "encoder_q-layer.11": 1794.0424, "encoder_q-layer.2": 965.1557, "encoder_q-layer.3": 902.7643, "encoder_q-layer.4": 909.7736, "encoder_q-layer.5": 757.8235, "encoder_q-layer.6": 653.4589, "encoder_q-layer.7": 669.9637, "encoder_q-layer.8": 716.4009, "encoder_q-layer.9": 628.9656, "epoch": 0.21, "inbatch_neg_score": 0.2539, "inbatch_pos_score": 0.7754, "learning_rate": 3.7444444444444446e-05, "loss": 4.0804, "norm_diff": 0.028, "norm_loss": 0.0, "num_token_doc": 66.7492, "num_token_overlap": 11.6752, "num_token_query": 31.387, "num_token_union": 65.0877, "num_word_context": 202.2777, "num_word_doc": 49.7834, "num_word_query": 23.3083, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1487.4113, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2549, "query_norm": 1.3501, "queue_k_norm": 1.3802, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.387, "sent_len_1": 66.7492, "sent_len_max_0": 127.5512, "sent_len_max_1": 191.3212, "stdk": 0.0465, "stdq": 0.0424, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 32600 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 4.0708, "doc_norm": 1.3816, "encoder_q-embeddings": 4073.9922, "encoder_q-layer.0": 2994.4543, "encoder_q-layer.1": 3434.4878, "encoder_q-layer.10": 678.8239, "encoder_q-layer.11": 1878.3557, "encoder_q-layer.2": 3858.2478, "encoder_q-layer.3": 4743.1377, "encoder_q-layer.4": 4396.4824, "encoder_q-layer.5": 3316.804, "encoder_q-layer.6": 2873.9651, "encoder_q-layer.7": 2538.1702, "encoder_q-layer.8": 1873.7632, "encoder_q-layer.9": 659.2944, "epoch": 0.21, "inbatch_neg_score": 0.252, "inbatch_pos_score": 0.792, "learning_rate": 3.738888888888889e-05, "loss": 4.0708, "norm_diff": 0.0213, "norm_loss": 0.0, "num_token_doc": 66.5997, "num_token_overlap": 11.6915, "num_token_query": 31.361, "num_token_union": 64.9986, "num_word_context": 202.0793, "num_word_doc": 49.7416, "num_word_query": 23.289, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4756.1912, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2512, "query_norm": 1.3674, "queue_k_norm": 1.3769, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.361, "sent_len_1": 66.5997, "sent_len_max_0": 127.5275, "sent_len_max_1": 188.835, "stdk": 0.0465, "stdq": 0.0432, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 32700 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 4.1156, "doc_norm": 1.3825, "encoder_q-embeddings": 3476.4817, "encoder_q-layer.0": 2402.2036, "encoder_q-layer.1": 2806.3428, "encoder_q-layer.10": 824.5389, "encoder_q-layer.11": 2080.1211, "encoder_q-layer.2": 2749.7441, "encoder_q-layer.3": 2803.3591, "encoder_q-layer.4": 2446.2366, "encoder_q-layer.5": 1948.6329, "encoder_q-layer.6": 1668.8418, "encoder_q-layer.7": 1120.8602, "encoder_q-layer.8": 819.9167, "encoder_q-layer.9": 681.7197, "epoch": 0.21, "inbatch_neg_score": 0.2504, "inbatch_pos_score": 0.7695, "learning_rate": 3.733333333333334e-05, "loss": 4.1156, "norm_diff": 0.0448, "norm_loss": 0.0, "num_token_doc": 66.6245, "num_token_overlap": 11.6509, "num_token_query": 31.2465, "num_token_union": 64.9661, "num_word_context": 201.8723, "num_word_doc": 49.7072, "num_word_query": 23.2083, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3390.4532, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2512, "query_norm": 1.3377, "queue_k_norm": 1.3802, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2465, "sent_len_1": 66.6245, "sent_len_max_0": 127.5837, "sent_len_max_1": 190.08, "stdk": 0.0466, "stdq": 0.0422, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 32800 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 4.1185, "doc_norm": 1.3829, "encoder_q-embeddings": 1318.8136, "encoder_q-layer.0": 919.7171, "encoder_q-layer.1": 1076.6616, "encoder_q-layer.10": 729.9254, "encoder_q-layer.11": 1913.7491, "encoder_q-layer.2": 1189.0952, "encoder_q-layer.3": 1185.1382, "encoder_q-layer.4": 1245.2225, "encoder_q-layer.5": 1112.6731, "encoder_q-layer.6": 839.598, "encoder_q-layer.7": 687.2131, "encoder_q-layer.8": 652.1141, "encoder_q-layer.9": 594.2953, "epoch": 0.21, "inbatch_neg_score": 0.2497, "inbatch_pos_score": 0.7905, "learning_rate": 3.727777777777778e-05, "loss": 4.1185, "norm_diff": 0.0411, "norm_loss": 0.0, "num_token_doc": 66.4951, "num_token_overlap": 11.6337, "num_token_query": 31.3501, "num_token_union": 64.974, "num_word_context": 201.9796, "num_word_doc": 49.6353, "num_word_query": 23.2759, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1692.7245, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2502, "query_norm": 1.3418, "queue_k_norm": 1.3805, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3501, "sent_len_1": 66.4951, "sent_len_max_0": 127.3925, "sent_len_max_1": 187.975, "stdk": 0.0466, "stdq": 0.0426, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 32900 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 4.0671, "doc_norm": 1.3824, "encoder_q-embeddings": 4313.6675, "encoder_q-layer.0": 3169.3315, "encoder_q-layer.1": 2928.7668, "encoder_q-layer.10": 698.9973, "encoder_q-layer.11": 1934.2053, "encoder_q-layer.2": 3287.8196, "encoder_q-layer.3": 3217.218, "encoder_q-layer.4": 3011.99, "encoder_q-layer.5": 2913.8228, "encoder_q-layer.6": 2013.261, "encoder_q-layer.7": 1179.6694, "encoder_q-layer.8": 946.0956, "encoder_q-layer.9": 714.3349, "epoch": 0.21, "inbatch_neg_score": 0.2472, "inbatch_pos_score": 0.7637, "learning_rate": 3.722222222222222e-05, "loss": 4.0671, "norm_diff": 0.0655, "norm_loss": 0.0, "num_token_doc": 67.0953, "num_token_overlap": 11.7289, "num_token_query": 31.3983, "num_token_union": 65.3126, "num_word_context": 202.8317, "num_word_doc": 50.0776, "num_word_query": 23.3189, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4065.8594, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2467, "query_norm": 1.3169, "queue_k_norm": 1.3813, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3983, "sent_len_1": 67.0953, "sent_len_max_0": 127.3113, "sent_len_max_1": 188.5475, "stdk": 0.0466, "stdq": 0.0415, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 33000 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 4.0804, "doc_norm": 1.3758, "encoder_q-embeddings": 5817.2651, "encoder_q-layer.0": 3939.657, "encoder_q-layer.1": 3862.8425, "encoder_q-layer.10": 675.1337, "encoder_q-layer.11": 1926.5685, "encoder_q-layer.2": 4673.5684, "encoder_q-layer.3": 5165.2373, "encoder_q-layer.4": 5748.624, "encoder_q-layer.5": 5015.3735, "encoder_q-layer.6": 5180.8105, "encoder_q-layer.7": 3881.4905, "encoder_q-layer.8": 2488.5972, "encoder_q-layer.9": 758.3554, "epoch": 0.22, "inbatch_neg_score": 0.2589, "inbatch_pos_score": 0.8003, "learning_rate": 3.7166666666666664e-05, "loss": 4.0804, "norm_diff": 0.0209, "norm_loss": 0.0, "num_token_doc": 66.843, "num_token_overlap": 11.7329, "num_token_query": 31.4957, "num_token_union": 65.2073, "num_word_context": 202.752, "num_word_doc": 49.8875, "num_word_query": 23.4247, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6443.4687, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2576, "query_norm": 1.3584, "queue_k_norm": 1.3785, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4957, "sent_len_1": 66.843, "sent_len_max_0": 127.6775, "sent_len_max_1": 189.095, "stdk": 0.0463, "stdq": 0.0428, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 33100 }, { "accuracy": 40.332, "active_queue_size": 16384.0, "cl_loss": 4.1175, "doc_norm": 1.3763, "encoder_q-embeddings": 5670.4189, "encoder_q-layer.0": 4051.9526, "encoder_q-layer.1": 4430.8384, "encoder_q-layer.10": 728.8358, "encoder_q-layer.11": 1954.8544, "encoder_q-layer.2": 4604.5884, "encoder_q-layer.3": 4591.6733, "encoder_q-layer.4": 4751.9194, "encoder_q-layer.5": 4784.8379, "encoder_q-layer.6": 4571.48, "encoder_q-layer.7": 3699.8381, "encoder_q-layer.8": 2087.9412, "encoder_q-layer.9": 732.1857, "epoch": 0.22, "inbatch_neg_score": 0.2433, "inbatch_pos_score": 0.749, "learning_rate": 3.7111111111111113e-05, "loss": 4.1175, "norm_diff": 0.0456, "norm_loss": 0.0, "num_token_doc": 66.7755, "num_token_overlap": 11.6644, "num_token_query": 31.4379, "num_token_union": 65.1462, "num_word_context": 201.9261, "num_word_doc": 49.8358, "num_word_query": 23.3589, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6078.246, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2433, "query_norm": 1.3307, "queue_k_norm": 1.3808, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4379, "sent_len_1": 66.7755, "sent_len_max_0": 127.605, "sent_len_max_1": 189.8713, "stdk": 0.0463, "stdq": 0.0423, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 33200 }, { "accuracy": 40.8203, "active_queue_size": 16384.0, "cl_loss": 4.088, "doc_norm": 1.379, "encoder_q-embeddings": 5490.9077, "encoder_q-layer.0": 4219.6162, "encoder_q-layer.1": 4466.1362, "encoder_q-layer.10": 692.2649, "encoder_q-layer.11": 2070.8435, "encoder_q-layer.2": 4648.2007, "encoder_q-layer.3": 4380.7646, "encoder_q-layer.4": 3932.9492, "encoder_q-layer.5": 3716.0342, "encoder_q-layer.6": 3021.363, "encoder_q-layer.7": 2065.9705, "encoder_q-layer.8": 1153.4994, "encoder_q-layer.9": 654.0674, "epoch": 0.22, "inbatch_neg_score": 0.2549, "inbatch_pos_score": 0.7764, "learning_rate": 3.705555555555556e-05, "loss": 4.088, "norm_diff": 0.0324, "norm_loss": 0.0, "num_token_doc": 66.742, "num_token_overlap": 11.6549, "num_token_query": 31.2931, "num_token_union": 65.0847, "num_word_context": 201.9967, "num_word_doc": 49.8071, "num_word_query": 23.2407, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5382.8159, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2559, "query_norm": 1.3499, "queue_k_norm": 1.3792, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2931, "sent_len_1": 66.742, "sent_len_max_0": 127.5, "sent_len_max_1": 190.0737, "stdk": 0.0465, "stdq": 0.0427, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 33300 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 4.0838, "doc_norm": 1.3775, "encoder_q-embeddings": 1880.5784, "encoder_q-layer.0": 1420.2755, "encoder_q-layer.1": 1358.424, "encoder_q-layer.10": 681.4366, "encoder_q-layer.11": 1981.562, "encoder_q-layer.2": 1577.8578, "encoder_q-layer.3": 1673.3958, "encoder_q-layer.4": 1659.9033, "encoder_q-layer.5": 1571.5325, "encoder_q-layer.6": 1297.618, "encoder_q-layer.7": 1144.0826, "encoder_q-layer.8": 847.2464, "encoder_q-layer.9": 630.0326, "epoch": 0.22, "inbatch_neg_score": 0.2544, "inbatch_pos_score": 0.8091, "learning_rate": 3.7e-05, "loss": 4.0838, "norm_diff": 0.0162, "norm_loss": 0.0, "num_token_doc": 66.6546, "num_token_overlap": 11.6886, "num_token_query": 31.4101, "num_token_union": 65.1169, "num_word_context": 202.1371, "num_word_doc": 49.7878, "num_word_query": 23.3348, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2208.9541, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2551, "query_norm": 1.3657, "queue_k_norm": 1.3765, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4101, "sent_len_1": 66.6546, "sent_len_max_0": 127.545, "sent_len_max_1": 187.885, "stdk": 0.0464, "stdq": 0.0434, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 33400 }, { "accuracy": 42.4805, "active_queue_size": 16384.0, "cl_loss": 4.1017, "doc_norm": 1.3798, "encoder_q-embeddings": 1602.8682, "encoder_q-layer.0": 1134.5543, "encoder_q-layer.1": 1273.127, "encoder_q-layer.10": 788.9916, "encoder_q-layer.11": 2019.9324, "encoder_q-layer.2": 1548.9019, "encoder_q-layer.3": 1591.833, "encoder_q-layer.4": 1568.1416, "encoder_q-layer.5": 1631.5544, "encoder_q-layer.6": 1308.3524, "encoder_q-layer.7": 1074.6385, "encoder_q-layer.8": 833.1846, "encoder_q-layer.9": 661.9211, "epoch": 0.22, "inbatch_neg_score": 0.2539, "inbatch_pos_score": 0.7852, "learning_rate": 3.694444444444445e-05, "loss": 4.1017, "norm_diff": 0.0278, "norm_loss": 0.0, "num_token_doc": 66.7745, "num_token_overlap": 11.6787, "num_token_query": 31.4008, "num_token_union": 65.0542, "num_word_context": 202.1087, "num_word_doc": 49.7871, "num_word_query": 23.3042, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2072.485, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2527, "query_norm": 1.352, "queue_k_norm": 1.3789, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4008, "sent_len_1": 66.7745, "sent_len_max_0": 127.445, "sent_len_max_1": 190.1425, "stdk": 0.0465, "stdq": 0.0433, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 33500 }, { "accuracy": 42.8711, "active_queue_size": 16384.0, "cl_loss": 4.0728, "doc_norm": 1.3751, "encoder_q-embeddings": 2040.0721, "encoder_q-layer.0": 1468.0109, "encoder_q-layer.1": 1532.349, "encoder_q-layer.10": 873.1898, "encoder_q-layer.11": 2019.4478, "encoder_q-layer.2": 1613.4524, "encoder_q-layer.3": 1290.4061, "encoder_q-layer.4": 1142.4812, "encoder_q-layer.5": 976.2729, "encoder_q-layer.6": 946.4236, "encoder_q-layer.7": 1000.4654, "encoder_q-layer.8": 852.668, "encoder_q-layer.9": 704.6058, "epoch": 0.22, "inbatch_neg_score": 0.2594, "inbatch_pos_score": 0.7979, "learning_rate": 3.688888888888889e-05, "loss": 4.0728, "norm_diff": 0.0216, "norm_loss": 0.0, "num_token_doc": 66.8367, "num_token_overlap": 11.7114, "num_token_query": 31.3042, "num_token_union": 65.0714, "num_word_context": 202.061, "num_word_doc": 49.8774, "num_word_query": 23.2363, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2072.5567, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2588, "query_norm": 1.3549, "queue_k_norm": 1.379, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3042, "sent_len_1": 66.8367, "sent_len_max_0": 127.365, "sent_len_max_1": 188.8537, "stdk": 0.0463, "stdq": 0.0431, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 33600 }, { "accuracy": 40.332, "active_queue_size": 16384.0, "cl_loss": 4.0946, "doc_norm": 1.3742, "encoder_q-embeddings": 1030.1953, "encoder_q-layer.0": 711.6225, "encoder_q-layer.1": 784.251, "encoder_q-layer.10": 744.9375, "encoder_q-layer.11": 1973.5063, "encoder_q-layer.2": 902.7331, "encoder_q-layer.3": 931.4564, "encoder_q-layer.4": 893.5768, "encoder_q-layer.5": 780.6956, "encoder_q-layer.6": 839.1551, "encoder_q-layer.7": 831.1851, "encoder_q-layer.8": 769.4886, "encoder_q-layer.9": 649.7961, "epoch": 0.22, "inbatch_neg_score": 0.2589, "inbatch_pos_score": 0.7764, "learning_rate": 3.683333333333334e-05, "loss": 4.0946, "norm_diff": 0.0438, "norm_loss": 0.0, "num_token_doc": 66.8005, "num_token_overlap": 11.6524, "num_token_query": 31.3492, "num_token_union": 65.1527, "num_word_context": 202.1816, "num_word_doc": 49.842, "num_word_query": 23.279, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1486.6797, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2581, "query_norm": 1.3304, "queue_k_norm": 1.3804, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3492, "sent_len_1": 66.8005, "sent_len_max_0": 127.2262, "sent_len_max_1": 189.0288, "stdk": 0.0463, "stdq": 0.0426, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 33700 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 4.082, "doc_norm": 1.3779, "encoder_q-embeddings": 817.3168, "encoder_q-layer.0": 573.571, "encoder_q-layer.1": 617.2898, "encoder_q-layer.10": 692.4132, "encoder_q-layer.11": 1885.2465, "encoder_q-layer.2": 684.3392, "encoder_q-layer.3": 697.1458, "encoder_q-layer.4": 723.1657, "encoder_q-layer.5": 672.608, "encoder_q-layer.6": 723.8884, "encoder_q-layer.7": 698.5551, "encoder_q-layer.8": 684.364, "encoder_q-layer.9": 601.1967, "epoch": 0.22, "inbatch_neg_score": 0.2559, "inbatch_pos_score": 0.7681, "learning_rate": 3.677777777777778e-05, "loss": 4.082, "norm_diff": 0.0587, "norm_loss": 0.0, "num_token_doc": 66.5344, "num_token_overlap": 11.6601, "num_token_query": 31.2988, "num_token_union": 64.929, "num_word_context": 202.084, "num_word_doc": 49.6458, "num_word_query": 23.2484, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1322.6162, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2563, "query_norm": 1.3192, "queue_k_norm": 1.3794, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2988, "sent_len_1": 66.5344, "sent_len_max_0": 127.4862, "sent_len_max_1": 189.5425, "stdk": 0.0464, "stdq": 0.0419, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 33800 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 4.0703, "doc_norm": 1.375, "encoder_q-embeddings": 1227.103, "encoder_q-layer.0": 858.7325, "encoder_q-layer.1": 838.6816, "encoder_q-layer.10": 723.438, "encoder_q-layer.11": 1971.4904, "encoder_q-layer.2": 923.6601, "encoder_q-layer.3": 967.3943, "encoder_q-layer.4": 1037.0055, "encoder_q-layer.5": 929.7004, "encoder_q-layer.6": 881.7344, "encoder_q-layer.7": 909.8853, "encoder_q-layer.8": 765.2469, "encoder_q-layer.9": 634.8801, "epoch": 0.22, "inbatch_neg_score": 0.2581, "inbatch_pos_score": 0.7949, "learning_rate": 3.672222222222222e-05, "loss": 4.0703, "norm_diff": 0.0325, "norm_loss": 0.0, "num_token_doc": 66.6265, "num_token_overlap": 11.7181, "num_token_query": 31.4454, "num_token_union": 65.0755, "num_word_context": 202.3136, "num_word_doc": 49.7137, "num_word_query": 23.3428, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1586.7812, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2566, "query_norm": 1.3425, "queue_k_norm": 1.3806, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4454, "sent_len_1": 66.6265, "sent_len_max_0": 127.4925, "sent_len_max_1": 188.5687, "stdk": 0.0463, "stdq": 0.0431, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 33900 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 4.0861, "doc_norm": 1.3829, "encoder_q-embeddings": 889.6326, "encoder_q-layer.0": 669.8805, "encoder_q-layer.1": 711.1403, "encoder_q-layer.10": 699.9227, "encoder_q-layer.11": 1889.978, "encoder_q-layer.2": 870.8131, "encoder_q-layer.3": 896.6808, "encoder_q-layer.4": 871.8403, "encoder_q-layer.5": 849.8658, "encoder_q-layer.6": 813.5897, "encoder_q-layer.7": 729.3642, "encoder_q-layer.8": 734.6885, "encoder_q-layer.9": 626.6133, "epoch": 0.22, "inbatch_neg_score": 0.2546, "inbatch_pos_score": 0.8091, "learning_rate": 3.6666666666666666e-05, "loss": 4.0861, "norm_diff": 0.0411, "norm_loss": 0.0, "num_token_doc": 66.8746, "num_token_overlap": 11.6694, "num_token_query": 31.2369, "num_token_union": 65.0786, "num_word_context": 202.1215, "num_word_doc": 49.8902, "num_word_query": 23.2051, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1417.0524, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2556, "query_norm": 1.344, "queue_k_norm": 1.3818, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2369, "sent_len_1": 66.8746, "sent_len_max_0": 127.3713, "sent_len_max_1": 189.5163, "stdk": 0.0466, "stdq": 0.043, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 34000 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 4.1, "doc_norm": 1.3848, "encoder_q-embeddings": 3320.7815, "encoder_q-layer.0": 2299.2798, "encoder_q-layer.1": 2637.6492, "encoder_q-layer.10": 753.2044, "encoder_q-layer.11": 1863.8247, "encoder_q-layer.2": 2768.374, "encoder_q-layer.3": 2855.2747, "encoder_q-layer.4": 2881.0029, "encoder_q-layer.5": 2892.1824, "encoder_q-layer.6": 2862.8379, "encoder_q-layer.7": 2378.4373, "encoder_q-layer.8": 1802.2571, "encoder_q-layer.9": 718.5524, "epoch": 0.22, "inbatch_neg_score": 0.254, "inbatch_pos_score": 0.7969, "learning_rate": 3.6611111111111115e-05, "loss": 4.1, "norm_diff": 0.051, "norm_loss": 0.0, "num_token_doc": 66.7259, "num_token_overlap": 11.6409, "num_token_query": 31.3241, "num_token_union": 65.0991, "num_word_context": 202.3155, "num_word_doc": 49.7819, "num_word_query": 23.263, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3638.3259, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2524, "query_norm": 1.3338, "queue_k_norm": 1.38, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3241, "sent_len_1": 66.7259, "sent_len_max_0": 127.385, "sent_len_max_1": 190.3875, "stdk": 0.0466, "stdq": 0.0426, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 34100 }, { "accuracy": 42.3828, "active_queue_size": 16384.0, "cl_loss": 4.1035, "doc_norm": 1.3782, "encoder_q-embeddings": 4228.3726, "encoder_q-layer.0": 3025.5864, "encoder_q-layer.1": 3403.4116, "encoder_q-layer.10": 698.426, "encoder_q-layer.11": 1903.1387, "encoder_q-layer.2": 3662.01, "encoder_q-layer.3": 3835.3577, "encoder_q-layer.4": 4544.3198, "encoder_q-layer.5": 3653.8284, "encoder_q-layer.6": 3301.8613, "encoder_q-layer.7": 3269.5801, "encoder_q-layer.8": 2455.53, "encoder_q-layer.9": 700.6462, "epoch": 0.22, "inbatch_neg_score": 0.2512, "inbatch_pos_score": 0.7739, "learning_rate": 3.655555555555556e-05, "loss": 4.1035, "norm_diff": 0.0511, "norm_loss": 0.0, "num_token_doc": 66.7492, "num_token_overlap": 11.6003, "num_token_query": 31.2505, "num_token_union": 65.0781, "num_word_context": 202.6269, "num_word_doc": 49.8412, "num_word_query": 23.2021, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4863.3391, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2517, "query_norm": 1.327, "queue_k_norm": 1.3807, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2505, "sent_len_1": 66.7492, "sent_len_max_0": 127.5662, "sent_len_max_1": 188.4288, "stdk": 0.0464, "stdq": 0.0425, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 34200 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 4.0578, "doc_norm": 1.38, "encoder_q-embeddings": 6877.6714, "encoder_q-layer.0": 4866.5044, "encoder_q-layer.1": 5048.7378, "encoder_q-layer.10": 1336.6136, "encoder_q-layer.11": 3495.2888, "encoder_q-layer.2": 5412.6079, "encoder_q-layer.3": 5147.5244, "encoder_q-layer.4": 4847.2432, "encoder_q-layer.5": 4752.3257, "encoder_q-layer.6": 3840.8381, "encoder_q-layer.7": 3138.3945, "encoder_q-layer.8": 2088.2549, "encoder_q-layer.9": 1243.7762, "epoch": 0.22, "inbatch_neg_score": 0.2504, "inbatch_pos_score": 0.7856, "learning_rate": 3.65e-05, "loss": 4.0578, "norm_diff": 0.0548, "norm_loss": 0.0, "num_token_doc": 66.8589, "num_token_overlap": 11.719, "num_token_query": 31.3976, "num_token_union": 65.1742, "num_word_context": 202.1702, "num_word_doc": 49.8643, "num_word_query": 23.3085, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6646.9138, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2494, "query_norm": 1.3252, "queue_k_norm": 1.3804, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3976, "sent_len_1": 66.8589, "sent_len_max_0": 127.29, "sent_len_max_1": 190.98, "stdk": 0.0465, "stdq": 0.0425, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 34300 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 4.0717, "doc_norm": 1.38, "encoder_q-embeddings": 3000.4553, "encoder_q-layer.0": 2004.3044, "encoder_q-layer.1": 2166.6812, "encoder_q-layer.10": 1313.5217, "encoder_q-layer.11": 3561.7336, "encoder_q-layer.2": 2336.8762, "encoder_q-layer.3": 2484.781, "encoder_q-layer.4": 2527.1965, "encoder_q-layer.5": 2286.3125, "encoder_q-layer.6": 2471.6277, "encoder_q-layer.7": 2225.978, "encoder_q-layer.8": 2087.061, "encoder_q-layer.9": 1240.8113, "epoch": 0.22, "inbatch_neg_score": 0.2464, "inbatch_pos_score": 0.7656, "learning_rate": 3.644444444444445e-05, "loss": 4.0717, "norm_diff": 0.0672, "norm_loss": 0.0, "num_token_doc": 67.0153, "num_token_overlap": 11.7355, "num_token_query": 31.465, "num_token_union": 65.2875, "num_word_context": 202.4209, "num_word_doc": 50.0037, "num_word_query": 23.3612, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3638.6434, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2474, "query_norm": 1.3128, "queue_k_norm": 1.381, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.465, "sent_len_1": 67.0153, "sent_len_max_0": 127.4762, "sent_len_max_1": 189.0325, "stdk": 0.0465, "stdq": 0.0421, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 34400 }, { "accuracy": 42.5781, "active_queue_size": 16384.0, "cl_loss": 4.086, "doc_norm": 1.3771, "encoder_q-embeddings": 8270.9961, "encoder_q-layer.0": 5430.3354, "encoder_q-layer.1": 6103.9287, "encoder_q-layer.10": 1377.5358, "encoder_q-layer.11": 3970.8972, "encoder_q-layer.2": 6646.3936, "encoder_q-layer.3": 7273.3706, "encoder_q-layer.4": 7221.1914, "encoder_q-layer.5": 7327.6484, "encoder_q-layer.6": 7153.3125, "encoder_q-layer.7": 6915.0562, "encoder_q-layer.8": 3877.2026, "encoder_q-layer.9": 1404.4496, "epoch": 0.22, "inbatch_neg_score": 0.2425, "inbatch_pos_score": 0.7598, "learning_rate": 3.638888888888889e-05, "loss": 4.086, "norm_diff": 0.0605, "norm_loss": 0.0, "num_token_doc": 66.76, "num_token_overlap": 11.6699, "num_token_query": 31.2889, "num_token_union": 65.0654, "num_word_context": 202.0466, "num_word_doc": 49.8212, "num_word_query": 23.2413, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9278.122, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2413, "query_norm": 1.3166, "queue_k_norm": 1.3825, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2889, "sent_len_1": 66.76, "sent_len_max_0": 127.425, "sent_len_max_1": 189.1912, "stdk": 0.0463, "stdq": 0.0422, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 34500 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 4.0833, "doc_norm": 1.3718, "encoder_q-embeddings": 13494.582, "encoder_q-layer.0": 9812.6162, "encoder_q-layer.1": 10711.1221, "encoder_q-layer.10": 1305.213, "encoder_q-layer.11": 3795.053, "encoder_q-layer.2": 11732.2891, "encoder_q-layer.3": 11329.6211, "encoder_q-layer.4": 8158.8022, "encoder_q-layer.5": 5000.9248, "encoder_q-layer.6": 3765.7119, "encoder_q-layer.7": 3073.7173, "encoder_q-layer.8": 2465.6682, "encoder_q-layer.9": 1256.9591, "epoch": 0.23, "inbatch_neg_score": 0.2394, "inbatch_pos_score": 0.75, "learning_rate": 3.633333333333333e-05, "loss": 4.0833, "norm_diff": 0.0689, "norm_loss": 0.0, "num_token_doc": 66.7037, "num_token_overlap": 11.7007, "num_token_query": 31.5018, "num_token_union": 65.1076, "num_word_context": 202.284, "num_word_doc": 49.7796, "num_word_query": 23.4056, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12433.9367, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.24, "query_norm": 1.3029, "queue_k_norm": 1.3806, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5018, "sent_len_1": 66.7037, "sent_len_max_0": 127.6325, "sent_len_max_1": 189.2875, "stdk": 0.0462, "stdq": 0.0415, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 34600 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 4.0685, "doc_norm": 1.3807, "encoder_q-embeddings": 5805.6807, "encoder_q-layer.0": 4047.2019, "encoder_q-layer.1": 4419.5645, "encoder_q-layer.10": 1403.5437, "encoder_q-layer.11": 3797.8127, "encoder_q-layer.2": 4942.7056, "encoder_q-layer.3": 5300.3037, "encoder_q-layer.4": 5655.0415, "encoder_q-layer.5": 5722.397, "encoder_q-layer.6": 6029.1113, "encoder_q-layer.7": 5430.022, "encoder_q-layer.8": 3397.9468, "encoder_q-layer.9": 1417.458, "epoch": 0.23, "inbatch_neg_score": 0.2411, "inbatch_pos_score": 0.7607, "learning_rate": 3.6277777777777776e-05, "loss": 4.0685, "norm_diff": 0.0479, "norm_loss": 0.0, "num_token_doc": 66.8534, "num_token_overlap": 11.6613, "num_token_query": 31.3145, "num_token_union": 65.1644, "num_word_context": 202.5943, "num_word_doc": 49.9005, "num_word_query": 23.2743, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7037.5878, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2408, "query_norm": 1.3328, "queue_k_norm": 1.3811, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3145, "sent_len_1": 66.8534, "sent_len_max_0": 127.4613, "sent_len_max_1": 188.8925, "stdk": 0.0465, "stdq": 0.0424, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 34700 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 4.0687, "doc_norm": 1.3827, "encoder_q-embeddings": 6621.9702, "encoder_q-layer.0": 4593.8213, "encoder_q-layer.1": 5056.0278, "encoder_q-layer.10": 1452.9279, "encoder_q-layer.11": 3720.0542, "encoder_q-layer.2": 5209.9902, "encoder_q-layer.3": 4768.6479, "encoder_q-layer.4": 4869.4507, "encoder_q-layer.5": 4753.1509, "encoder_q-layer.6": 5059.4902, "encoder_q-layer.7": 4498.1265, "encoder_q-layer.8": 3391.1199, "encoder_q-layer.9": 1346.4835, "epoch": 0.23, "inbatch_neg_score": 0.2384, "inbatch_pos_score": 0.7676, "learning_rate": 3.6222222222222225e-05, "loss": 4.0687, "norm_diff": 0.0641, "norm_loss": 0.0, "num_token_doc": 66.7199, "num_token_overlap": 11.7262, "num_token_query": 31.5226, "num_token_union": 65.148, "num_word_context": 202.1116, "num_word_doc": 49.7636, "num_word_query": 23.4356, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6861.4027, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2373, "query_norm": 1.3186, "queue_k_norm": 1.3823, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5226, "sent_len_1": 66.7199, "sent_len_max_0": 127.2575, "sent_len_max_1": 190.4512, "stdk": 0.0466, "stdq": 0.0418, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 34800 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 4.0678, "doc_norm": 1.3811, "encoder_q-embeddings": 2327.8, "encoder_q-layer.0": 1524.8473, "encoder_q-layer.1": 1723.8741, "encoder_q-layer.10": 1257.0801, "encoder_q-layer.11": 3556.3311, "encoder_q-layer.2": 2011.5084, "encoder_q-layer.3": 2079.2598, "encoder_q-layer.4": 2192.1555, "encoder_q-layer.5": 2106.4575, "encoder_q-layer.6": 1843.7063, "encoder_q-layer.7": 1768.3318, "encoder_q-layer.8": 1523.3767, "encoder_q-layer.9": 1189.9155, "epoch": 0.23, "inbatch_neg_score": 0.2302, "inbatch_pos_score": 0.7773, "learning_rate": 3.6166666666666674e-05, "loss": 4.0678, "norm_diff": 0.051, "norm_loss": 0.0, "num_token_doc": 66.8101, "num_token_overlap": 11.6533, "num_token_query": 31.2632, "num_token_union": 65.0973, "num_word_context": 202.3502, "num_word_doc": 49.8514, "num_word_query": 23.2144, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3119.7112, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2297, "query_norm": 1.33, "queue_k_norm": 1.382, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2632, "sent_len_1": 66.8101, "sent_len_max_0": 127.4875, "sent_len_max_1": 187.9375, "stdk": 0.0465, "stdq": 0.0424, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 34900 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 4.0465, "doc_norm": 1.372, "encoder_q-embeddings": 2846.583, "encoder_q-layer.0": 1874.5806, "encoder_q-layer.1": 2064.9146, "encoder_q-layer.10": 1327.8005, "encoder_q-layer.11": 3474.4426, "encoder_q-layer.2": 2408.6511, "encoder_q-layer.3": 2621.7764, "encoder_q-layer.4": 2575.8464, "encoder_q-layer.5": 2733.0437, "encoder_q-layer.6": 2633.2756, "encoder_q-layer.7": 2324.5396, "encoder_q-layer.8": 2011.7058, "encoder_q-layer.9": 1191.6749, "epoch": 0.23, "inbatch_neg_score": 0.2293, "inbatch_pos_score": 0.7432, "learning_rate": 3.611111111111111e-05, "loss": 4.0465, "norm_diff": 0.0434, "norm_loss": 0.0, "num_token_doc": 66.6403, "num_token_overlap": 11.668, "num_token_query": 31.4506, "num_token_union": 65.1081, "num_word_context": 202.2722, "num_word_doc": 49.7613, "num_word_query": 23.362, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3653.9704, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2286, "query_norm": 1.3286, "queue_k_norm": 1.3811, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4506, "sent_len_1": 66.6403, "sent_len_max_0": 127.4325, "sent_len_max_1": 190.3237, "stdk": 0.0462, "stdq": 0.0424, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 35000 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 4.0752, "doc_norm": 1.3783, "encoder_q-embeddings": 3839.8552, "encoder_q-layer.0": 2720.1506, "encoder_q-layer.1": 2912.5378, "encoder_q-layer.10": 1411.3204, "encoder_q-layer.11": 3421.7632, "encoder_q-layer.2": 3064.4702, "encoder_q-layer.3": 3234.9827, "encoder_q-layer.4": 3349.0303, "encoder_q-layer.5": 3500.8591, "encoder_q-layer.6": 2772.4365, "encoder_q-layer.7": 2508.6873, "encoder_q-layer.8": 1896.3309, "encoder_q-layer.9": 1337.2397, "epoch": 0.23, "inbatch_neg_score": 0.222, "inbatch_pos_score": 0.7676, "learning_rate": 3.605555555555556e-05, "loss": 4.0752, "norm_diff": 0.0442, "norm_loss": 0.0, "num_token_doc": 66.8227, "num_token_overlap": 11.7015, "num_token_query": 31.3416, "num_token_union": 65.0846, "num_word_context": 201.974, "num_word_doc": 49.8664, "num_word_query": 23.2772, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4393.7095, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2209, "query_norm": 1.3341, "queue_k_norm": 1.3808, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3416, "sent_len_1": 66.8227, "sent_len_max_0": 127.465, "sent_len_max_1": 190.5213, "stdk": 0.0465, "stdq": 0.0426, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 35100 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 4.0572, "doc_norm": 1.3814, "encoder_q-embeddings": 1621.5914, "encoder_q-layer.0": 1041.5555, "encoder_q-layer.1": 1011.6263, "encoder_q-layer.10": 1303.8867, "encoder_q-layer.11": 3304.6208, "encoder_q-layer.2": 1045.9265, "encoder_q-layer.3": 1050.4833, "encoder_q-layer.4": 1088.1382, "encoder_q-layer.5": 1137.3654, "encoder_q-layer.6": 1170.0232, "encoder_q-layer.7": 1199.0432, "encoder_q-layer.8": 1380.2542, "encoder_q-layer.9": 1166.563, "epoch": 0.23, "inbatch_neg_score": 0.229, "inbatch_pos_score": 0.7437, "learning_rate": 3.6e-05, "loss": 4.0572, "norm_diff": 0.0619, "norm_loss": 0.0, "num_token_doc": 66.7485, "num_token_overlap": 11.7138, "num_token_query": 31.4021, "num_token_union": 65.0619, "num_word_context": 201.8991, "num_word_doc": 49.7647, "num_word_query": 23.2979, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2324.4464, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2269, "query_norm": 1.3195, "queue_k_norm": 1.3773, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4021, "sent_len_1": 66.7485, "sent_len_max_0": 127.4463, "sent_len_max_1": 191.3625, "stdk": 0.0466, "stdq": 0.0417, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 35200 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 4.0819, "doc_norm": 1.3824, "encoder_q-embeddings": 3369.1807, "encoder_q-layer.0": 2539.0754, "encoder_q-layer.1": 2779.5747, "encoder_q-layer.10": 1301.7344, "encoder_q-layer.11": 3513.2021, "encoder_q-layer.2": 2968.0759, "encoder_q-layer.3": 3058.1033, "encoder_q-layer.4": 3055.0908, "encoder_q-layer.5": 2750.9551, "encoder_q-layer.6": 2197.3079, "encoder_q-layer.7": 2014.9684, "encoder_q-layer.8": 1747.6464, "encoder_q-layer.9": 1166.6902, "epoch": 0.23, "inbatch_neg_score": 0.218, "inbatch_pos_score": 0.7363, "learning_rate": 3.594444444444445e-05, "loss": 4.0819, "norm_diff": 0.0733, "norm_loss": 0.0, "num_token_doc": 66.9864, "num_token_overlap": 11.6343, "num_token_query": 31.2018, "num_token_union": 65.1926, "num_word_context": 202.5251, "num_word_doc": 49.9905, "num_word_query": 23.1758, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3995.9734, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.218, "query_norm": 1.3091, "queue_k_norm": 1.3791, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2018, "sent_len_1": 66.9864, "sent_len_max_0": 127.51, "sent_len_max_1": 190.3925, "stdk": 0.0467, "stdq": 0.0417, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 35300 }, { "accuracy": 41.4062, "active_queue_size": 16384.0, "cl_loss": 4.0696, "doc_norm": 1.3859, "encoder_q-embeddings": 2392.635, "encoder_q-layer.0": 1575.5985, "encoder_q-layer.1": 1704.625, "encoder_q-layer.10": 1320.6346, "encoder_q-layer.11": 3602.8069, "encoder_q-layer.2": 1977.8878, "encoder_q-layer.3": 2179.9976, "encoder_q-layer.4": 2401.9993, "encoder_q-layer.5": 2425.6882, "encoder_q-layer.6": 2546.678, "encoder_q-layer.7": 2465.061, "encoder_q-layer.8": 2216.2717, "encoder_q-layer.9": 1323.9381, "epoch": 0.23, "inbatch_neg_score": 0.2183, "inbatch_pos_score": 0.7388, "learning_rate": 3.5888888888888886e-05, "loss": 4.0696, "norm_diff": 0.0648, "norm_loss": 0.0, "num_token_doc": 66.5953, "num_token_overlap": 11.661, "num_token_query": 31.3198, "num_token_union": 64.9694, "num_word_context": 201.9995, "num_word_doc": 49.6868, "num_word_query": 23.2465, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3439.3125, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2186, "query_norm": 1.3211, "queue_k_norm": 1.3779, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3198, "sent_len_1": 66.5953, "sent_len_max_0": 127.29, "sent_len_max_1": 188.8113, "stdk": 0.0469, "stdq": 0.0425, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 35400 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 4.0767, "doc_norm": 1.3855, "encoder_q-embeddings": 7042.3022, "encoder_q-layer.0": 5162.0088, "encoder_q-layer.1": 5053.5581, "encoder_q-layer.10": 1264.098, "encoder_q-layer.11": 3230.1143, "encoder_q-layer.2": 5268.0737, "encoder_q-layer.3": 4281.751, "encoder_q-layer.4": 3447.3083, "encoder_q-layer.5": 3280.4836, "encoder_q-layer.6": 2622.1995, "encoder_q-layer.7": 2323.7629, "encoder_q-layer.8": 1724.819, "encoder_q-layer.9": 1224.7439, "epoch": 0.23, "inbatch_neg_score": 0.2116, "inbatch_pos_score": 0.7422, "learning_rate": 3.5833333333333335e-05, "loss": 4.0767, "norm_diff": 0.0755, "norm_loss": 0.0, "num_token_doc": 66.7718, "num_token_overlap": 11.6802, "num_token_query": 31.3329, "num_token_union": 65.1267, "num_word_context": 202.4344, "num_word_doc": 49.8556, "num_word_query": 23.2587, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6308.3505, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2134, "query_norm": 1.31, "queue_k_norm": 1.3788, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3329, "sent_len_1": 66.7718, "sent_len_max_0": 127.3063, "sent_len_max_1": 188.0625, "stdk": 0.0469, "stdq": 0.0423, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 35500 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 4.0461, "doc_norm": 1.3858, "encoder_q-embeddings": 2161.8813, "encoder_q-layer.0": 1567.4316, "encoder_q-layer.1": 1593.2759, "encoder_q-layer.10": 1184.687, "encoder_q-layer.11": 3172.5444, "encoder_q-layer.2": 1659.4653, "encoder_q-layer.3": 1557.2136, "encoder_q-layer.4": 1486.9976, "encoder_q-layer.5": 1411.251, "encoder_q-layer.6": 1323.2715, "encoder_q-layer.7": 1316.3331, "encoder_q-layer.8": 1447.3693, "encoder_q-layer.9": 1178.772, "epoch": 0.23, "inbatch_neg_score": 0.2151, "inbatch_pos_score": 0.7593, "learning_rate": 3.577777777777778e-05, "loss": 4.0461, "norm_diff": 0.0753, "norm_loss": 0.0, "num_token_doc": 66.9908, "num_token_overlap": 11.7252, "num_token_query": 31.4673, "num_token_union": 65.2655, "num_word_context": 202.3076, "num_word_doc": 49.9577, "num_word_query": 23.3596, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2656.23, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.214, "query_norm": 1.3105, "queue_k_norm": 1.3795, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4673, "sent_len_1": 66.9908, "sent_len_max_0": 127.4737, "sent_len_max_1": 190.9275, "stdk": 0.0469, "stdq": 0.0423, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 35600 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 4.066, "doc_norm": 1.3748, "encoder_q-embeddings": 1729.5399, "encoder_q-layer.0": 1264.9022, "encoder_q-layer.1": 1339.3571, "encoder_q-layer.10": 1316.4666, "encoder_q-layer.11": 3482.2456, "encoder_q-layer.2": 1521.1575, "encoder_q-layer.3": 1521.2693, "encoder_q-layer.4": 1370.6104, "encoder_q-layer.5": 1363.1489, "encoder_q-layer.6": 1335.1396, "encoder_q-layer.7": 1314.26, "encoder_q-layer.8": 1438.3325, "encoder_q-layer.9": 1211.694, "epoch": 0.23, "inbatch_neg_score": 0.2184, "inbatch_pos_score": 0.7505, "learning_rate": 3.5722222222222226e-05, "loss": 4.066, "norm_diff": 0.0764, "norm_loss": 0.0, "num_token_doc": 66.5661, "num_token_overlap": 11.6729, "num_token_query": 31.4071, "num_token_union": 64.9946, "num_word_context": 202.4627, "num_word_doc": 49.6617, "num_word_query": 23.3347, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2540.3959, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2191, "query_norm": 1.2984, "queue_k_norm": 1.3778, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4071, "sent_len_1": 66.5661, "sent_len_max_0": 127.3175, "sent_len_max_1": 190.635, "stdk": 0.0465, "stdq": 0.0419, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 35700 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 4.0608, "doc_norm": 1.3786, "encoder_q-embeddings": 2831.2573, "encoder_q-layer.0": 1928.7786, "encoder_q-layer.1": 1956.2932, "encoder_q-layer.10": 1281.7391, "encoder_q-layer.11": 3388.7158, "encoder_q-layer.2": 2229.4507, "encoder_q-layer.3": 2179.1487, "encoder_q-layer.4": 2049.9819, "encoder_q-layer.5": 1913.8939, "encoder_q-layer.6": 1711.5283, "encoder_q-layer.7": 1598.856, "encoder_q-layer.8": 1537.4749, "encoder_q-layer.9": 1173.6001, "epoch": 0.23, "inbatch_neg_score": 0.2218, "inbatch_pos_score": 0.7646, "learning_rate": 3.566666666666667e-05, "loss": 4.0608, "norm_diff": 0.0511, "norm_loss": 0.0, "num_token_doc": 66.8274, "num_token_overlap": 11.7001, "num_token_query": 31.4513, "num_token_union": 65.2247, "num_word_context": 202.3601, "num_word_doc": 49.8563, "num_word_query": 23.3697, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3211.4714, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2203, "query_norm": 1.3274, "queue_k_norm": 1.3801, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4513, "sent_len_1": 66.8274, "sent_len_max_0": 127.3912, "sent_len_max_1": 186.9112, "stdk": 0.0466, "stdq": 0.0428, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 35800 }, { "accuracy": 41.3086, "active_queue_size": 16384.0, "cl_loss": 4.0631, "doc_norm": 1.376, "encoder_q-embeddings": 1462.5146, "encoder_q-layer.0": 985.1544, "encoder_q-layer.1": 1035.7219, "encoder_q-layer.10": 1268.2205, "encoder_q-layer.11": 3254.5305, "encoder_q-layer.2": 1130.6705, "encoder_q-layer.3": 1154.2183, "encoder_q-layer.4": 1120.7539, "encoder_q-layer.5": 1092.0017, "encoder_q-layer.6": 1178.9135, "encoder_q-layer.7": 1288.6226, "encoder_q-layer.8": 1412.765, "encoder_q-layer.9": 1211.1521, "epoch": 0.23, "inbatch_neg_score": 0.2206, "inbatch_pos_score": 0.7305, "learning_rate": 3.561111111111111e-05, "loss": 4.0631, "norm_diff": 0.0678, "norm_loss": 0.0, "num_token_doc": 66.7737, "num_token_overlap": 11.6361, "num_token_query": 31.3108, "num_token_union": 65.1132, "num_word_context": 202.1112, "num_word_doc": 49.8654, "num_word_query": 23.2707, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2265.6988, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2198, "query_norm": 1.3081, "queue_k_norm": 1.3808, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3108, "sent_len_1": 66.7737, "sent_len_max_0": 127.49, "sent_len_max_1": 189.23, "stdk": 0.0466, "stdq": 0.0419, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 35900 }, { "accuracy": 44.3359, "active_queue_size": 16384.0, "cl_loss": 4.0554, "doc_norm": 1.3786, "encoder_q-embeddings": 2519.759, "encoder_q-layer.0": 1743.0167, "encoder_q-layer.1": 1837.2042, "encoder_q-layer.10": 1261.2638, "encoder_q-layer.11": 3139.7683, "encoder_q-layer.2": 2019.5254, "encoder_q-layer.3": 2016.0251, "encoder_q-layer.4": 2025.0834, "encoder_q-layer.5": 1770.1024, "encoder_q-layer.6": 1564.3925, "encoder_q-layer.7": 1516.8197, "encoder_q-layer.8": 1488.1976, "encoder_q-layer.9": 1213.5549, "epoch": 0.23, "inbatch_neg_score": 0.2181, "inbatch_pos_score": 0.7544, "learning_rate": 3.555555555555556e-05, "loss": 4.0554, "norm_diff": 0.0517, "norm_loss": 0.0, "num_token_doc": 66.7248, "num_token_overlap": 11.6022, "num_token_query": 31.2453, "num_token_union": 65.0647, "num_word_context": 202.3096, "num_word_doc": 49.7752, "num_word_query": 23.2172, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2953.8902, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2173, "query_norm": 1.3269, "queue_k_norm": 1.3786, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2453, "sent_len_1": 66.7248, "sent_len_max_0": 127.4587, "sent_len_max_1": 191.1738, "stdk": 0.0467, "stdq": 0.0425, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 36000 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 4.0448, "doc_norm": 1.3823, "encoder_q-embeddings": 3483.8794, "encoder_q-layer.0": 2244.2917, "encoder_q-layer.1": 2576.0361, "encoder_q-layer.10": 1225.571, "encoder_q-layer.11": 3216.7471, "encoder_q-layer.2": 2748.856, "encoder_q-layer.3": 2895.8674, "encoder_q-layer.4": 2942.061, "encoder_q-layer.5": 2906.3154, "encoder_q-layer.6": 2563.7354, "encoder_q-layer.7": 2235.2151, "encoder_q-layer.8": 1665.5431, "encoder_q-layer.9": 1219.5142, "epoch": 0.23, "inbatch_neg_score": 0.226, "inbatch_pos_score": 0.7695, "learning_rate": 3.55e-05, "loss": 4.0448, "norm_diff": 0.0449, "norm_loss": 0.0, "num_token_doc": 66.8275, "num_token_overlap": 11.7014, "num_token_query": 31.442, "num_token_union": 65.155, "num_word_context": 202.4768, "num_word_doc": 49.8451, "num_word_query": 23.356, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3911.1532, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2256, "query_norm": 1.3374, "queue_k_norm": 1.3789, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.442, "sent_len_1": 66.8275, "sent_len_max_0": 127.5875, "sent_len_max_1": 191.7862, "stdk": 0.0468, "stdq": 0.0425, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 36100 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 4.0632, "doc_norm": 1.3797, "encoder_q-embeddings": 5625.3789, "encoder_q-layer.0": 4352.9478, "encoder_q-layer.1": 5131.9712, "encoder_q-layer.10": 1473.2573, "encoder_q-layer.11": 3621.6545, "encoder_q-layer.2": 6491.9277, "encoder_q-layer.3": 6456.6069, "encoder_q-layer.4": 6765.9785, "encoder_q-layer.5": 6468.0112, "encoder_q-layer.6": 6370.271, "encoder_q-layer.7": 5085.3218, "encoder_q-layer.8": 3855.3889, "encoder_q-layer.9": 1503.228, "epoch": 0.24, "inbatch_neg_score": 0.2301, "inbatch_pos_score": 0.7642, "learning_rate": 3.5444444444444445e-05, "loss": 4.0632, "norm_diff": 0.0568, "norm_loss": 0.0, "num_token_doc": 66.8413, "num_token_overlap": 11.6717, "num_token_query": 31.4072, "num_token_union": 65.1479, "num_word_context": 201.9458, "num_word_doc": 49.8419, "num_word_query": 23.3173, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7758.1576, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2292, "query_norm": 1.3229, "queue_k_norm": 1.3797, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4072, "sent_len_1": 66.8413, "sent_len_max_0": 127.5812, "sent_len_max_1": 191.765, "stdk": 0.0467, "stdq": 0.0417, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 36200 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 4.0465, "doc_norm": 1.3716, "encoder_q-embeddings": 14922.6787, "encoder_q-layer.0": 10542.7021, "encoder_q-layer.1": 11069.418, "encoder_q-layer.10": 2702.8735, "encoder_q-layer.11": 6417.3574, "encoder_q-layer.2": 13525.2822, "encoder_q-layer.3": 13237.2803, "encoder_q-layer.4": 14235.0303, "encoder_q-layer.5": 12941.9629, "encoder_q-layer.6": 10197.5049, "encoder_q-layer.7": 9948.1133, "encoder_q-layer.8": 6811.1177, "encoder_q-layer.9": 2831.0754, "epoch": 0.24, "inbatch_neg_score": 0.2378, "inbatch_pos_score": 0.7739, "learning_rate": 3.538888888888889e-05, "loss": 4.0465, "norm_diff": 0.0183, "norm_loss": 0.0, "num_token_doc": 66.9417, "num_token_overlap": 11.6849, "num_token_query": 31.4207, "num_token_union": 65.3, "num_word_context": 202.6162, "num_word_doc": 49.9296, "num_word_query": 23.3147, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16249.4041, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.238, "query_norm": 1.3577, "queue_k_norm": 1.3813, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4207, "sent_len_1": 66.9417, "sent_len_max_0": 127.5588, "sent_len_max_1": 189.1775, "stdk": 0.0464, "stdq": 0.043, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 36300 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 4.0334, "doc_norm": 1.3803, "encoder_q-embeddings": 3662.929, "encoder_q-layer.0": 2561.0879, "encoder_q-layer.1": 2682.1274, "encoder_q-layer.10": 2609.9026, "encoder_q-layer.11": 6244.1328, "encoder_q-layer.2": 2705.4595, "encoder_q-layer.3": 2689.926, "encoder_q-layer.4": 2790.5398, "encoder_q-layer.5": 2690.0393, "encoder_q-layer.6": 2626.1611, "encoder_q-layer.7": 2735.542, "encoder_q-layer.8": 2794.697, "encoder_q-layer.9": 2426.9907, "epoch": 0.24, "inbatch_neg_score": 0.243, "inbatch_pos_score": 0.7842, "learning_rate": 3.5333333333333336e-05, "loss": 4.0334, "norm_diff": 0.0553, "norm_loss": 0.0, "num_token_doc": 66.6916, "num_token_overlap": 11.6567, "num_token_query": 31.3155, "num_token_union": 65.0437, "num_word_context": 202.1866, "num_word_doc": 49.7734, "num_word_query": 23.2388, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4811.3182, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2434, "query_norm": 1.325, "queue_k_norm": 1.3811, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3155, "sent_len_1": 66.6916, "sent_len_max_0": 127.5187, "sent_len_max_1": 190.27, "stdk": 0.0467, "stdq": 0.0417, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 36400 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 4.0503, "doc_norm": 1.3855, "encoder_q-embeddings": 5945.3989, "encoder_q-layer.0": 4366.0024, "encoder_q-layer.1": 4585.3999, "encoder_q-layer.10": 2750.062, "encoder_q-layer.11": 6692.1426, "encoder_q-layer.2": 5436.8853, "encoder_q-layer.3": 5160.958, "encoder_q-layer.4": 4747.5278, "encoder_q-layer.5": 4976.1323, "encoder_q-layer.6": 4150.915, "encoder_q-layer.7": 3611.6084, "encoder_q-layer.8": 3382.02, "encoder_q-layer.9": 2629.1047, "epoch": 0.24, "inbatch_neg_score": 0.2426, "inbatch_pos_score": 0.7788, "learning_rate": 3.527777777777778e-05, "loss": 4.0503, "norm_diff": 0.0517, "norm_loss": 0.0, "num_token_doc": 66.7814, "num_token_overlap": 11.6629, "num_token_query": 31.4129, "num_token_union": 65.1722, "num_word_context": 202.4093, "num_word_doc": 49.824, "num_word_query": 23.3271, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7144.3567, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2413, "query_norm": 1.3338, "queue_k_norm": 1.3837, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4129, "sent_len_1": 66.7814, "sent_len_max_0": 127.4625, "sent_len_max_1": 189.545, "stdk": 0.0469, "stdq": 0.0419, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 36500 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 4.0504, "doc_norm": 1.3818, "encoder_q-embeddings": 4768.5962, "encoder_q-layer.0": 3427.8423, "encoder_q-layer.1": 3595.1545, "encoder_q-layer.10": 2547.7222, "encoder_q-layer.11": 6618.168, "encoder_q-layer.2": 4073.8899, "encoder_q-layer.3": 4301.0332, "encoder_q-layer.4": 4444.5449, "encoder_q-layer.5": 4573.624, "encoder_q-layer.6": 4656.3384, "encoder_q-layer.7": 4233.4697, "encoder_q-layer.8": 3878.0903, "encoder_q-layer.9": 2568.2458, "epoch": 0.24, "inbatch_neg_score": 0.245, "inbatch_pos_score": 0.7881, "learning_rate": 3.522222222222222e-05, "loss": 4.0504, "norm_diff": 0.0428, "norm_loss": 0.0, "num_token_doc": 66.5252, "num_token_overlap": 11.6854, "num_token_query": 31.3291, "num_token_union": 64.9219, "num_word_context": 201.9561, "num_word_doc": 49.6303, "num_word_query": 23.254, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6410.7212, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2454, "query_norm": 1.339, "queue_k_norm": 1.3803, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3291, "sent_len_1": 66.5252, "sent_len_max_0": 127.5863, "sent_len_max_1": 190.765, "stdk": 0.0468, "stdq": 0.0421, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 36600 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 4.0492, "doc_norm": 1.3874, "encoder_q-embeddings": 5533.105, "encoder_q-layer.0": 3851.0737, "encoder_q-layer.1": 3665.178, "encoder_q-layer.10": 2700.8884, "encoder_q-layer.11": 6662.0615, "encoder_q-layer.2": 4081.5361, "encoder_q-layer.3": 4236.9033, "encoder_q-layer.4": 4371.2148, "encoder_q-layer.5": 3771.9692, "encoder_q-layer.6": 3709.5764, "encoder_q-layer.7": 3625.5901, "encoder_q-layer.8": 3316.7874, "encoder_q-layer.9": 2555.4668, "epoch": 0.24, "inbatch_neg_score": 0.2418, "inbatch_pos_score": 0.77, "learning_rate": 3.516666666666667e-05, "loss": 4.0492, "norm_diff": 0.0665, "norm_loss": 0.0, "num_token_doc": 66.7213, "num_token_overlap": 11.6403, "num_token_query": 31.3381, "num_token_union": 65.0731, "num_word_context": 202.3614, "num_word_doc": 49.7506, "num_word_query": 23.2765, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6427.6169, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2413, "query_norm": 1.3209, "queue_k_norm": 1.3822, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3381, "sent_len_1": 66.7213, "sent_len_max_0": 127.6612, "sent_len_max_1": 190.61, "stdk": 0.047, "stdq": 0.0415, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 36700 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 4.0332, "doc_norm": 1.3749, "encoder_q-embeddings": 3066.5471, "encoder_q-layer.0": 2118.5559, "encoder_q-layer.1": 2261.0261, "encoder_q-layer.10": 2508.0364, "encoder_q-layer.11": 6442.2534, "encoder_q-layer.2": 2411.0688, "encoder_q-layer.3": 2628.4363, "encoder_q-layer.4": 2721.3508, "encoder_q-layer.5": 2717.5955, "encoder_q-layer.6": 3211.4253, "encoder_q-layer.7": 3011.259, "encoder_q-layer.8": 2927.45, "encoder_q-layer.9": 2475.79, "epoch": 0.24, "inbatch_neg_score": 0.2272, "inbatch_pos_score": 0.7646, "learning_rate": 3.511111111111111e-05, "loss": 4.0332, "norm_diff": 0.0451, "norm_loss": 0.0, "num_token_doc": 66.9053, "num_token_overlap": 11.7116, "num_token_query": 31.4451, "num_token_union": 65.1789, "num_word_context": 202.2455, "num_word_doc": 49.8545, "num_word_query": 23.3328, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4704.2045, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2273, "query_norm": 1.3309, "queue_k_norm": 1.3799, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4451, "sent_len_1": 66.9053, "sent_len_max_0": 127.505, "sent_len_max_1": 191.12, "stdk": 0.0465, "stdq": 0.0424, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 36800 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 4.0307, "doc_norm": 1.381, "encoder_q-embeddings": 4504.0244, "encoder_q-layer.0": 3036.4229, "encoder_q-layer.1": 3116.5376, "encoder_q-layer.10": 2756.0364, "encoder_q-layer.11": 6709.4951, "encoder_q-layer.2": 3611.7964, "encoder_q-layer.3": 3681.5442, "encoder_q-layer.4": 3763.2466, "encoder_q-layer.5": 3365.6411, "encoder_q-layer.6": 3292.2969, "encoder_q-layer.7": 3465.5989, "encoder_q-layer.8": 3214.1338, "encoder_q-layer.9": 2496.7727, "epoch": 0.24, "inbatch_neg_score": 0.2292, "inbatch_pos_score": 0.771, "learning_rate": 3.505555555555556e-05, "loss": 4.0307, "norm_diff": 0.0642, "norm_loss": 0.0, "num_token_doc": 66.6903, "num_token_overlap": 11.7226, "num_token_query": 31.5455, "num_token_union": 65.125, "num_word_context": 202.2375, "num_word_doc": 49.7341, "num_word_query": 23.441, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5688.6306, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2269, "query_norm": 1.3168, "queue_k_norm": 1.3808, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5455, "sent_len_1": 66.6903, "sent_len_max_0": 127.245, "sent_len_max_1": 189.275, "stdk": 0.0467, "stdq": 0.042, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 36900 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 4.05, "doc_norm": 1.3836, "encoder_q-embeddings": 16636.209, "encoder_q-layer.0": 12082.3418, "encoder_q-layer.1": 12152.1045, "encoder_q-layer.10": 2801.7627, "encoder_q-layer.11": 6422.5825, "encoder_q-layer.2": 14083.4258, "encoder_q-layer.3": 14067.5107, "encoder_q-layer.4": 13612.1787, "encoder_q-layer.5": 10493.7676, "encoder_q-layer.6": 10533.7256, "encoder_q-layer.7": 9082.625, "encoder_q-layer.8": 6386.9209, "encoder_q-layer.9": 3034.6885, "epoch": 0.24, "inbatch_neg_score": 0.2226, "inbatch_pos_score": 0.7656, "learning_rate": 3.5e-05, "loss": 4.05, "norm_diff": 0.0529, "norm_loss": 0.0, "num_token_doc": 66.6971, "num_token_overlap": 11.6649, "num_token_query": 31.3548, "num_token_union": 65.0523, "num_word_context": 202.0251, "num_word_doc": 49.7882, "num_word_query": 23.2846, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16912.5641, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2222, "query_norm": 1.3307, "queue_k_norm": 1.3802, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3548, "sent_len_1": 66.6971, "sent_len_max_0": 127.4213, "sent_len_max_1": 188.5462, "stdk": 0.0469, "stdq": 0.0429, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 37000 }, { "accuracy": 41.1133, "active_queue_size": 16384.0, "cl_loss": 4.0217, "doc_norm": 1.3802, "encoder_q-embeddings": 3640.5767, "encoder_q-layer.0": 2539.873, "encoder_q-layer.1": 2556.5576, "encoder_q-layer.10": 2556.2678, "encoder_q-layer.11": 5998.5879, "encoder_q-layer.2": 2813.7637, "encoder_q-layer.3": 2771.5635, "encoder_q-layer.4": 2788.1057, "encoder_q-layer.5": 2771.5881, "encoder_q-layer.6": 2958.7893, "encoder_q-layer.7": 2908.9263, "encoder_q-layer.8": 2857.5996, "encoder_q-layer.9": 2371.6748, "epoch": 0.24, "inbatch_neg_score": 0.2223, "inbatch_pos_score": 0.748, "learning_rate": 3.4944444444444446e-05, "loss": 4.0217, "norm_diff": 0.0753, "norm_loss": 0.0, "num_token_doc": 66.9386, "num_token_overlap": 11.7238, "num_token_query": 31.4504, "num_token_union": 65.2258, "num_word_context": 202.6551, "num_word_doc": 49.9703, "num_word_query": 23.3725, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4896.6383, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2214, "query_norm": 1.3049, "queue_k_norm": 1.3808, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4504, "sent_len_1": 66.9386, "sent_len_max_0": 127.3912, "sent_len_max_1": 190.4288, "stdk": 0.0467, "stdq": 0.0421, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 37100 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 4.0539, "doc_norm": 1.3848, "encoder_q-embeddings": 2702.3687, "encoder_q-layer.0": 1846.7882, "encoder_q-layer.1": 1910.9958, "encoder_q-layer.10": 2688.4507, "encoder_q-layer.11": 6432.0562, "encoder_q-layer.2": 2034.4493, "encoder_q-layer.3": 2140.4756, "encoder_q-layer.4": 2144.1985, "encoder_q-layer.5": 2189.0046, "encoder_q-layer.6": 2374.6155, "encoder_q-layer.7": 2620.6912, "encoder_q-layer.8": 2857.6816, "encoder_q-layer.9": 2481.978, "epoch": 0.24, "inbatch_neg_score": 0.2132, "inbatch_pos_score": 0.7476, "learning_rate": 3.4888888888888895e-05, "loss": 4.0539, "norm_diff": 0.0722, "norm_loss": 0.0, "num_token_doc": 66.9578, "num_token_overlap": 11.668, "num_token_query": 31.2671, "num_token_union": 65.1715, "num_word_context": 202.7398, "num_word_doc": 49.9852, "num_word_query": 23.2139, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4377.5125, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2129, "query_norm": 1.3127, "queue_k_norm": 1.3814, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2671, "sent_len_1": 66.9578, "sent_len_max_0": 127.4463, "sent_len_max_1": 189.9538, "stdk": 0.0469, "stdq": 0.0427, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 37200 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 4.0281, "doc_norm": 1.3846, "encoder_q-embeddings": 2532.7737, "encoder_q-layer.0": 1630.9412, "encoder_q-layer.1": 1711.7201, "encoder_q-layer.10": 2614.3628, "encoder_q-layer.11": 6503.7705, "encoder_q-layer.2": 1910.075, "encoder_q-layer.3": 1995.8866, "encoder_q-layer.4": 2080.8481, "encoder_q-layer.5": 2112.5793, "encoder_q-layer.6": 2225.5176, "encoder_q-layer.7": 2465.0852, "encoder_q-layer.8": 2732.001, "encoder_q-layer.9": 2441.5378, "epoch": 0.24, "inbatch_neg_score": 0.2001, "inbatch_pos_score": 0.7373, "learning_rate": 3.483333333333334e-05, "loss": 4.0281, "norm_diff": 0.0938, "norm_loss": 0.0, "num_token_doc": 66.6432, "num_token_overlap": 11.6689, "num_token_query": 31.364, "num_token_union": 65.0215, "num_word_context": 202.1927, "num_word_doc": 49.7294, "num_word_query": 23.302, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4279.1966, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2007, "query_norm": 1.2908, "queue_k_norm": 1.3797, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.364, "sent_len_1": 66.6432, "sent_len_max_0": 127.4387, "sent_len_max_1": 191.1, "stdk": 0.0469, "stdq": 0.042, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 37300 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.0296, "doc_norm": 1.3794, "encoder_q-embeddings": 3830.2451, "encoder_q-layer.0": 2810.6777, "encoder_q-layer.1": 2891.9382, "encoder_q-layer.10": 2368.9915, "encoder_q-layer.11": 5878.168, "encoder_q-layer.2": 3379.2866, "encoder_q-layer.3": 3801.2922, "encoder_q-layer.4": 3735.5588, "encoder_q-layer.5": 3318.8193, "encoder_q-layer.6": 3209.7434, "encoder_q-layer.7": 2946.467, "encoder_q-layer.8": 2885.1101, "encoder_q-layer.9": 2356.2163, "epoch": 0.24, "inbatch_neg_score": 0.2062, "inbatch_pos_score": 0.7251, "learning_rate": 3.477777777777778e-05, "loss": 4.0296, "norm_diff": 0.0924, "norm_loss": 0.0, "num_token_doc": 66.9329, "num_token_overlap": 11.6798, "num_token_query": 31.4226, "num_token_union": 65.2588, "num_word_context": 202.7633, "num_word_doc": 49.952, "num_word_query": 23.3482, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5239.2929, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2068, "query_norm": 1.287, "queue_k_norm": 1.3792, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4226, "sent_len_1": 66.9329, "sent_len_max_0": 127.5375, "sent_len_max_1": 190.3113, "stdk": 0.0468, "stdq": 0.0416, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 37400 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 4.0274, "doc_norm": 1.377, "encoder_q-embeddings": 2873.8071, "encoder_q-layer.0": 1946.5812, "encoder_q-layer.1": 1970.4911, "encoder_q-layer.10": 2561.1833, "encoder_q-layer.11": 6410.6597, "encoder_q-layer.2": 2224.6584, "encoder_q-layer.3": 2272.0251, "encoder_q-layer.4": 2309.7898, "encoder_q-layer.5": 2260.8276, "encoder_q-layer.6": 2511.6477, "encoder_q-layer.7": 2787.8203, "encoder_q-layer.8": 3009.7683, "encoder_q-layer.9": 2476.7473, "epoch": 0.24, "inbatch_neg_score": 0.1977, "inbatch_pos_score": 0.7236, "learning_rate": 3.472222222222222e-05, "loss": 4.0274, "norm_diff": 0.0895, "norm_loss": 0.0, "num_token_doc": 66.6879, "num_token_overlap": 11.6266, "num_token_query": 31.27, "num_token_union": 65.0768, "num_word_context": 202.0312, "num_word_doc": 49.8098, "num_word_query": 23.2245, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4514.7504, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1976, "query_norm": 1.2875, "queue_k_norm": 1.3804, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.27, "sent_len_1": 66.6879, "sent_len_max_0": 127.3538, "sent_len_max_1": 187.425, "stdk": 0.0467, "stdq": 0.0415, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 37500 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 4.0237, "doc_norm": 1.3735, "encoder_q-embeddings": 10002.623, "encoder_q-layer.0": 6863.9604, "encoder_q-layer.1": 7183.6309, "encoder_q-layer.10": 2962.9224, "encoder_q-layer.11": 7047.0059, "encoder_q-layer.2": 8758.5459, "encoder_q-layer.3": 9461.627, "encoder_q-layer.4": 9700.0449, "encoder_q-layer.5": 9696.3682, "encoder_q-layer.6": 9592.8984, "encoder_q-layer.7": 10641.124, "encoder_q-layer.8": 5876.1743, "encoder_q-layer.9": 2777.3145, "epoch": 0.24, "inbatch_neg_score": 0.1942, "inbatch_pos_score": 0.7119, "learning_rate": 3.466666666666667e-05, "loss": 4.0237, "norm_diff": 0.0735, "norm_loss": 0.0, "num_token_doc": 66.6966, "num_token_overlap": 11.6675, "num_token_query": 31.3744, "num_token_union": 65.0877, "num_word_context": 202.2765, "num_word_doc": 49.7557, "num_word_query": 23.3074, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12115.1955, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1935, "query_norm": 1.3, "queue_k_norm": 1.3774, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3744, "sent_len_1": 66.6966, "sent_len_max_0": 127.3825, "sent_len_max_1": 190.4638, "stdk": 0.0465, "stdq": 0.042, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 37600 }, { "accuracy": 41.3086, "active_queue_size": 16384.0, "cl_loss": 4.0149, "doc_norm": 1.3776, "encoder_q-embeddings": 3366.1013, "encoder_q-layer.0": 2183.8887, "encoder_q-layer.1": 2304.6013, "encoder_q-layer.10": 2711.1035, "encoder_q-layer.11": 6519.5093, "encoder_q-layer.2": 2594.8406, "encoder_q-layer.3": 2731.2722, "encoder_q-layer.4": 2796.2627, "encoder_q-layer.5": 2792.2537, "encoder_q-layer.6": 2801.9692, "encoder_q-layer.7": 3104.7034, "encoder_q-layer.8": 3185.9146, "encoder_q-layer.9": 2785.9712, "epoch": 0.25, "inbatch_neg_score": 0.1957, "inbatch_pos_score": 0.7246, "learning_rate": 3.4611111111111114e-05, "loss": 4.0149, "norm_diff": 0.0609, "norm_loss": 0.0, "num_token_doc": 66.7088, "num_token_overlap": 11.713, "num_token_query": 31.4952, "num_token_union": 65.116, "num_word_context": 202.1543, "num_word_doc": 49.7652, "num_word_query": 23.4072, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4915.5758, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1949, "query_norm": 1.3167, "queue_k_norm": 1.3766, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4952, "sent_len_1": 66.7088, "sent_len_max_0": 127.555, "sent_len_max_1": 190.51, "stdk": 0.0468, "stdq": 0.0427, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 37700 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 4.0165, "doc_norm": 1.3757, "encoder_q-embeddings": 2990.7693, "encoder_q-layer.0": 2015.9636, "encoder_q-layer.1": 2094.1663, "encoder_q-layer.10": 2501.2979, "encoder_q-layer.11": 6385.0703, "encoder_q-layer.2": 2423.4919, "encoder_q-layer.3": 2661.2468, "encoder_q-layer.4": 2857.6201, "encoder_q-layer.5": 2772.4163, "encoder_q-layer.6": 3009.9331, "encoder_q-layer.7": 3258.7344, "encoder_q-layer.8": 3523.8159, "encoder_q-layer.9": 2803.1431, "epoch": 0.25, "inbatch_neg_score": 0.1942, "inbatch_pos_score": 0.7383, "learning_rate": 3.4555555555555556e-05, "loss": 4.0165, "norm_diff": 0.0721, "norm_loss": 0.0, "num_token_doc": 66.7796, "num_token_overlap": 11.656, "num_token_query": 31.3969, "num_token_union": 65.1662, "num_word_context": 202.1595, "num_word_doc": 49.8558, "num_word_query": 23.3237, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4944.0423, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1947, "query_norm": 1.3036, "queue_k_norm": 1.3749, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3969, "sent_len_1": 66.7796, "sent_len_max_0": 127.5162, "sent_len_max_1": 189.6788, "stdk": 0.0468, "stdq": 0.0423, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 37800 }, { "accuracy": 42.8711, "active_queue_size": 16384.0, "cl_loss": 4.0335, "doc_norm": 1.3717, "encoder_q-embeddings": 7849.2998, "encoder_q-layer.0": 5251.8818, "encoder_q-layer.1": 5645.0303, "encoder_q-layer.10": 2908.0227, "encoder_q-layer.11": 6788.7559, "encoder_q-layer.2": 7064.4438, "encoder_q-layer.3": 7507.3628, "encoder_q-layer.4": 7502.0122, "encoder_q-layer.5": 6644.6362, "encoder_q-layer.6": 5573.0146, "encoder_q-layer.7": 4055.043, "encoder_q-layer.8": 3758.6904, "encoder_q-layer.9": 2724.9604, "epoch": 0.25, "inbatch_neg_score": 0.1951, "inbatch_pos_score": 0.7319, "learning_rate": 3.45e-05, "loss": 4.0335, "norm_diff": 0.0402, "norm_loss": 0.0, "num_token_doc": 67.0514, "num_token_overlap": 11.7104, "num_token_query": 31.4292, "num_token_union": 65.2938, "num_word_context": 202.552, "num_word_doc": 50.0439, "num_word_query": 23.3549, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8878.8964, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1941, "query_norm": 1.3315, "queue_k_norm": 1.3775, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4292, "sent_len_1": 67.0514, "sent_len_max_0": 127.5025, "sent_len_max_1": 188.3675, "stdk": 0.0466, "stdq": 0.0434, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 37900 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 4.0173, "doc_norm": 1.3719, "encoder_q-embeddings": 3554.1392, "encoder_q-layer.0": 2552.6831, "encoder_q-layer.1": 2767.9121, "encoder_q-layer.10": 2513.7007, "encoder_q-layer.11": 6188.4126, "encoder_q-layer.2": 3042.689, "encoder_q-layer.3": 3012.1489, "encoder_q-layer.4": 2901.4478, "encoder_q-layer.5": 2555.6248, "encoder_q-layer.6": 2668.3579, "encoder_q-layer.7": 2729.8333, "encoder_q-layer.8": 2818.583, "encoder_q-layer.9": 2415.9407, "epoch": 0.25, "inbatch_neg_score": 0.1969, "inbatch_pos_score": 0.7319, "learning_rate": 3.444444444444445e-05, "loss": 4.0173, "norm_diff": 0.0678, "norm_loss": 0.0, "num_token_doc": 66.7821, "num_token_overlap": 11.7098, "num_token_query": 31.4754, "num_token_union": 65.1326, "num_word_context": 202.2473, "num_word_doc": 49.79, "num_word_query": 23.3812, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4915.3294, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.196, "query_norm": 1.3042, "queue_k_norm": 1.3753, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4754, "sent_len_1": 66.7821, "sent_len_max_0": 127.5425, "sent_len_max_1": 190.5075, "stdk": 0.0467, "stdq": 0.0424, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 38000 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 4.0296, "doc_norm": 1.3798, "encoder_q-embeddings": 3714.5447, "encoder_q-layer.0": 2562.25, "encoder_q-layer.1": 2688.5713, "encoder_q-layer.10": 2565.0989, "encoder_q-layer.11": 6484.3403, "encoder_q-layer.2": 2837.1963, "encoder_q-layer.3": 2832.3838, "encoder_q-layer.4": 2753.7139, "encoder_q-layer.5": 2644.0508, "encoder_q-layer.6": 2683.6294, "encoder_q-layer.7": 2715.333, "encoder_q-layer.8": 2824.3044, "encoder_q-layer.9": 2415.8997, "epoch": 0.25, "inbatch_neg_score": 0.1958, "inbatch_pos_score": 0.7549, "learning_rate": 3.438888888888889e-05, "loss": 4.0296, "norm_diff": 0.0612, "norm_loss": 0.0, "num_token_doc": 66.5257, "num_token_overlap": 11.629, "num_token_query": 31.3759, "num_token_union": 65.0282, "num_word_context": 202.1106, "num_word_doc": 49.6251, "num_word_query": 23.3194, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4922.5481, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.197, "query_norm": 1.3187, "queue_k_norm": 1.374, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3759, "sent_len_1": 66.5257, "sent_len_max_0": 127.43, "sent_len_max_1": 190.4638, "stdk": 0.047, "stdq": 0.0428, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 38100 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 4.0069, "doc_norm": 1.3804, "encoder_q-embeddings": 2743.4897, "encoder_q-layer.0": 1855.5746, "encoder_q-layer.1": 1872.7592, "encoder_q-layer.10": 2357.4348, "encoder_q-layer.11": 6201.9634, "encoder_q-layer.2": 2050.4382, "encoder_q-layer.3": 2114.6841, "encoder_q-layer.4": 2179.5193, "encoder_q-layer.5": 2158.4771, "encoder_q-layer.6": 2258.3596, "encoder_q-layer.7": 2418.4297, "encoder_q-layer.8": 2590.7625, "encoder_q-layer.9": 2189.8328, "epoch": 0.25, "inbatch_neg_score": 0.2, "inbatch_pos_score": 0.7422, "learning_rate": 3.433333333333333e-05, "loss": 4.0069, "norm_diff": 0.079, "norm_loss": 0.0, "num_token_doc": 66.764, "num_token_overlap": 11.7157, "num_token_query": 31.5251, "num_token_union": 65.1681, "num_word_context": 202.2686, "num_word_doc": 49.8027, "num_word_query": 23.4302, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4265.7565, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2002, "query_norm": 1.3014, "queue_k_norm": 1.3736, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5251, "sent_len_1": 66.764, "sent_len_max_0": 127.4762, "sent_len_max_1": 189.645, "stdk": 0.0471, "stdq": 0.0421, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 38200 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.9938, "doc_norm": 1.3737, "encoder_q-embeddings": 29531.3398, "encoder_q-layer.0": 22028.6699, "encoder_q-layer.1": 23843.6543, "encoder_q-layer.10": 5492.6758, "encoder_q-layer.11": 12422.9521, "encoder_q-layer.2": 27669.9434, "encoder_q-layer.3": 27747.9688, "encoder_q-layer.4": 28715.5898, "encoder_q-layer.5": 27209.082, "encoder_q-layer.6": 21138.0098, "encoder_q-layer.7": 24105.5137, "encoder_q-layer.8": 15911.0938, "encoder_q-layer.9": 6263.4678, "epoch": 0.25, "inbatch_neg_score": 0.2065, "inbatch_pos_score": 0.7441, "learning_rate": 3.427777777777778e-05, "loss": 3.9938, "norm_diff": 0.0701, "norm_loss": 0.0, "num_token_doc": 66.6705, "num_token_overlap": 11.7076, "num_token_query": 31.4929, "num_token_union": 65.0598, "num_word_context": 202.6957, "num_word_doc": 49.7237, "num_word_query": 23.3992, "postclip_grad_norm": 1.0, "preclip_grad_norm": 34006.9834, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.2064, "query_norm": 1.3036, "queue_k_norm": 1.3719, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4929, "sent_len_1": 66.6705, "sent_len_max_0": 127.6112, "sent_len_max_1": 191.6887, "stdk": 0.0468, "stdq": 0.042, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 38300 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.9995, "doc_norm": 1.3758, "encoder_q-embeddings": 7189.8735, "encoder_q-layer.0": 4905.502, "encoder_q-layer.1": 5205.1392, "encoder_q-layer.10": 4893.647, "encoder_q-layer.11": 12574.25, "encoder_q-layer.2": 6456.6865, "encoder_q-layer.3": 6923.2563, "encoder_q-layer.4": 7087.8184, "encoder_q-layer.5": 6606.1714, "encoder_q-layer.6": 6623.6665, "encoder_q-layer.7": 6443.6514, "encoder_q-layer.8": 6486.6602, "encoder_q-layer.9": 5168.2847, "epoch": 0.25, "inbatch_neg_score": 0.218, "inbatch_pos_score": 0.7466, "learning_rate": 3.4222222222222224e-05, "loss": 3.9995, "norm_diff": 0.062, "norm_loss": 0.0, "num_token_doc": 66.7579, "num_token_overlap": 11.6844, "num_token_query": 31.5773, "num_token_union": 65.2296, "num_word_context": 202.3496, "num_word_doc": 49.8675, "num_word_query": 23.4615, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10445.231, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2178, "query_norm": 1.3138, "queue_k_norm": 1.3728, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5773, "sent_len_1": 66.7579, "sent_len_max_0": 127.5613, "sent_len_max_1": 188.6188, "stdk": 0.0469, "stdq": 0.0421, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 38400 }, { "accuracy": 41.2109, "active_queue_size": 16384.0, "cl_loss": 4.0025, "doc_norm": 1.364, "encoder_q-embeddings": 6215.1606, "encoder_q-layer.0": 4242.7871, "encoder_q-layer.1": 4479.7261, "encoder_q-layer.10": 5200.0264, "encoder_q-layer.11": 13616.9531, "encoder_q-layer.2": 4885.9429, "encoder_q-layer.3": 4789.1211, "encoder_q-layer.4": 4960.2773, "encoder_q-layer.5": 4893.9771, "encoder_q-layer.6": 5438.5703, "encoder_q-layer.7": 5711.6245, "encoder_q-layer.8": 6340.3408, "encoder_q-layer.9": 5073.168, "epoch": 0.25, "inbatch_neg_score": 0.217, "inbatch_pos_score": 0.7261, "learning_rate": 3.4166666666666666e-05, "loss": 4.0025, "norm_diff": 0.0587, "norm_loss": 0.0, "num_token_doc": 66.833, "num_token_overlap": 11.7401, "num_token_query": 31.5462, "num_token_union": 65.1756, "num_word_context": 202.6483, "num_word_doc": 49.8405, "num_word_query": 23.441, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9665.4349, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2156, "query_norm": 1.3053, "queue_k_norm": 1.3747, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5462, "sent_len_1": 66.833, "sent_len_max_0": 127.5025, "sent_len_max_1": 191.6275, "stdk": 0.0464, "stdq": 0.0418, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 38500 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 4.0126, "doc_norm": 1.3715, "encoder_q-embeddings": 5447.2051, "encoder_q-layer.0": 3592.5999, "encoder_q-layer.1": 3587.0928, "encoder_q-layer.10": 5091.2637, "encoder_q-layer.11": 12313.6699, "encoder_q-layer.2": 4021.7083, "encoder_q-layer.3": 4165.7783, "encoder_q-layer.4": 4436.8286, "encoder_q-layer.5": 4199.4253, "encoder_q-layer.6": 4597.6382, "encoder_q-layer.7": 5041.2979, "encoder_q-layer.8": 5486.521, "encoder_q-layer.9": 4692.4761, "epoch": 0.25, "inbatch_neg_score": 0.219, "inbatch_pos_score": 0.7476, "learning_rate": 3.411111111111111e-05, "loss": 4.0126, "norm_diff": 0.0653, "norm_loss": 0.0, "num_token_doc": 66.8184, "num_token_overlap": 11.6968, "num_token_query": 31.5084, "num_token_union": 65.1958, "num_word_context": 202.5188, "num_word_doc": 49.822, "num_word_query": 23.4189, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8626.7084, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2188, "query_norm": 1.3062, "queue_k_norm": 1.3773, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5084, "sent_len_1": 66.8184, "sent_len_max_0": 127.645, "sent_len_max_1": 191.335, "stdk": 0.0467, "stdq": 0.0418, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 38600 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 4.0117, "doc_norm": 1.366, "encoder_q-embeddings": 6560.6216, "encoder_q-layer.0": 4607.207, "encoder_q-layer.1": 4794.3857, "encoder_q-layer.10": 4880.4604, "encoder_q-layer.11": 12371.4258, "encoder_q-layer.2": 5197.7222, "encoder_q-layer.3": 5429.2563, "encoder_q-layer.4": 5479.8901, "encoder_q-layer.5": 5124.5918, "encoder_q-layer.6": 6011.3008, "encoder_q-layer.7": 6405.333, "encoder_q-layer.8": 6285.6167, "encoder_q-layer.9": 4825.9629, "epoch": 0.25, "inbatch_neg_score": 0.2234, "inbatch_pos_score": 0.7451, "learning_rate": 3.405555555555556e-05, "loss": 4.0117, "norm_diff": 0.0494, "norm_loss": 0.0, "num_token_doc": 66.6981, "num_token_overlap": 11.5952, "num_token_query": 31.2831, "num_token_union": 65.0778, "num_word_context": 202.2062, "num_word_doc": 49.6998, "num_word_query": 23.2169, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9657.3051, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2235, "query_norm": 1.3166, "queue_k_norm": 1.3729, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2831, "sent_len_1": 66.6981, "sent_len_max_0": 127.2512, "sent_len_max_1": 192.755, "stdk": 0.0465, "stdq": 0.042, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 38700 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 4.0104, "doc_norm": 1.3704, "encoder_q-embeddings": 5188.7842, "encoder_q-layer.0": 3480.7808, "encoder_q-layer.1": 3663.5237, "encoder_q-layer.10": 5689.543, "encoder_q-layer.11": 13236.8047, "encoder_q-layer.2": 4158.2764, "encoder_q-layer.3": 4211.1929, "encoder_q-layer.4": 4213.6987, "encoder_q-layer.5": 4174.5825, "encoder_q-layer.6": 4566.8271, "encoder_q-layer.7": 4873.9561, "encoder_q-layer.8": 5403.2363, "encoder_q-layer.9": 5019.6611, "epoch": 0.25, "inbatch_neg_score": 0.2223, "inbatch_pos_score": 0.7861, "learning_rate": 3.4000000000000007e-05, "loss": 4.0104, "norm_diff": 0.0333, "norm_loss": 0.0, "num_token_doc": 66.7046, "num_token_overlap": 11.6775, "num_token_query": 31.359, "num_token_union": 65.062, "num_word_context": 202.3644, "num_word_doc": 49.7932, "num_word_query": 23.3048, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8610.7664, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2224, "query_norm": 1.337, "queue_k_norm": 1.3757, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.359, "sent_len_1": 66.7046, "sent_len_max_0": 127.4262, "sent_len_max_1": 189.4762, "stdk": 0.0467, "stdq": 0.0431, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 38800 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.9985, "doc_norm": 1.3927, "encoder_q-embeddings": 28368.3652, "encoder_q-layer.0": 19887.3828, "encoder_q-layer.1": 23013.4258, "encoder_q-layer.10": 4804.8896, "encoder_q-layer.11": 12493.0195, "encoder_q-layer.2": 26681.3848, "encoder_q-layer.3": 26117.5273, "encoder_q-layer.4": 22735.7129, "encoder_q-layer.5": 21080.8848, "encoder_q-layer.6": 17773.2461, "encoder_q-layer.7": 12045.5146, "encoder_q-layer.8": 8415.1787, "encoder_q-layer.9": 4990.4209, "epoch": 0.25, "inbatch_neg_score": 0.2227, "inbatch_pos_score": 0.7773, "learning_rate": 3.394444444444444e-05, "loss": 3.9985, "norm_diff": 0.0699, "norm_loss": 0.0, "num_token_doc": 67.1489, "num_token_overlap": 11.7324, "num_token_query": 31.4433, "num_token_union": 65.3351, "num_word_context": 202.5512, "num_word_doc": 50.0176, "num_word_query": 23.3554, "postclip_grad_norm": 1.0, "preclip_grad_norm": 29335.4754, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.2244, "query_norm": 1.3228, "queue_k_norm": 1.3762, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4433, "sent_len_1": 67.1489, "sent_len_max_0": 127.5288, "sent_len_max_1": 191.3725, "stdk": 0.0475, "stdq": 0.0425, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 38900 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 4.007, "doc_norm": 1.3738, "encoder_q-embeddings": 16416.8418, "encoder_q-layer.0": 12907.6172, "encoder_q-layer.1": 14207.8232, "encoder_q-layer.10": 5568.7266, "encoder_q-layer.11": 12579.2822, "encoder_q-layer.2": 15165.5654, "encoder_q-layer.3": 14104.1182, "encoder_q-layer.4": 13421.6729, "encoder_q-layer.5": 13715.1504, "encoder_q-layer.6": 15267.0303, "encoder_q-layer.7": 13404.4082, "encoder_q-layer.8": 11277.3926, "encoder_q-layer.9": 6340.4722, "epoch": 0.25, "inbatch_neg_score": 0.2277, "inbatch_pos_score": 0.7617, "learning_rate": 3.388888888888889e-05, "loss": 4.007, "norm_diff": 0.0486, "norm_loss": 0.0, "num_token_doc": 66.9063, "num_token_overlap": 11.6418, "num_token_query": 31.3505, "num_token_union": 65.2012, "num_word_context": 202.3966, "num_word_doc": 49.9675, "num_word_query": 23.2707, "postclip_grad_norm": 1.0, "preclip_grad_norm": 19658.0586, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2267, "query_norm": 1.328, "queue_k_norm": 1.3737, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3505, "sent_len_1": 66.9063, "sent_len_max_0": 127.55, "sent_len_max_1": 187.5, "stdk": 0.0467, "stdq": 0.0428, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 39000 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 3.9881, "doc_norm": 1.376, "encoder_q-embeddings": 10178.2275, "encoder_q-layer.0": 6990.0928, "encoder_q-layer.1": 7405.2114, "encoder_q-layer.10": 2645.9785, "encoder_q-layer.11": 6327.5034, "encoder_q-layer.2": 7939.3467, "encoder_q-layer.3": 8419.1123, "encoder_q-layer.4": 8760.8467, "encoder_q-layer.5": 8783.4023, "encoder_q-layer.6": 7593.7012, "encoder_q-layer.7": 6133.6372, "encoder_q-layer.8": 4177.3589, "encoder_q-layer.9": 2661.8967, "epoch": 0.25, "inbatch_neg_score": 0.2327, "inbatch_pos_score": 0.7715, "learning_rate": 3.3833333333333334e-05, "loss": 3.9881, "norm_diff": 0.0464, "norm_loss": 0.0, "num_token_doc": 66.8588, "num_token_overlap": 11.7004, "num_token_query": 31.3511, "num_token_union": 65.1309, "num_word_context": 201.8306, "num_word_doc": 49.8226, "num_word_query": 23.2725, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10918.2165, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2332, "query_norm": 1.3295, "queue_k_norm": 1.3754, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3511, "sent_len_1": 66.8588, "sent_len_max_0": 127.3875, "sent_len_max_1": 191.4288, "stdk": 0.0468, "stdq": 0.0426, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 39100 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.9982, "doc_norm": 1.3799, "encoder_q-embeddings": 3620.2656, "encoder_q-layer.0": 2515.522, "encoder_q-layer.1": 2832.5916, "encoder_q-layer.10": 2574.4189, "encoder_q-layer.11": 6200.1987, "encoder_q-layer.2": 3279.4556, "encoder_q-layer.3": 3318.9885, "encoder_q-layer.4": 3573.7139, "encoder_q-layer.5": 2947.114, "encoder_q-layer.6": 2751.7666, "encoder_q-layer.7": 2941.4365, "encoder_q-layer.8": 3056.4556, "encoder_q-layer.9": 2439.9209, "epoch": 0.26, "inbatch_neg_score": 0.2318, "inbatch_pos_score": 0.7778, "learning_rate": 3.377777777777778e-05, "loss": 3.9982, "norm_diff": 0.0579, "norm_loss": 0.0, "num_token_doc": 66.6267, "num_token_overlap": 11.6476, "num_token_query": 31.2702, "num_token_union": 64.9485, "num_word_context": 201.9074, "num_word_doc": 49.6781, "num_word_query": 23.2196, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5090.3906, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2332, "query_norm": 1.322, "queue_k_norm": 1.3739, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2702, "sent_len_1": 66.6267, "sent_len_max_0": 127.4813, "sent_len_max_1": 191.3212, "stdk": 0.0469, "stdq": 0.0422, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 39200 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.999, "doc_norm": 1.3851, "encoder_q-embeddings": 3148.1438, "encoder_q-layer.0": 2179.4451, "encoder_q-layer.1": 2287.5469, "encoder_q-layer.10": 2412.6118, "encoder_q-layer.11": 6184.6538, "encoder_q-layer.2": 2479.5093, "encoder_q-layer.3": 2560.2371, "encoder_q-layer.4": 2463.7971, "encoder_q-layer.5": 2277.4326, "encoder_q-layer.6": 2161.752, "encoder_q-layer.7": 2314.3948, "encoder_q-layer.8": 2624.1233, "encoder_q-layer.9": 2257.4902, "epoch": 0.26, "inbatch_neg_score": 0.2306, "inbatch_pos_score": 0.7676, "learning_rate": 3.3722222222222225e-05, "loss": 3.999, "norm_diff": 0.0661, "norm_loss": 0.0, "num_token_doc": 66.7297, "num_token_overlap": 11.6668, "num_token_query": 31.4151, "num_token_union": 65.1048, "num_word_context": 202.1761, "num_word_doc": 49.7826, "num_word_query": 23.3312, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4525.3161, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2306, "query_norm": 1.319, "queue_k_norm": 1.3748, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4151, "sent_len_1": 66.7297, "sent_len_max_0": 127.4725, "sent_len_max_1": 188.795, "stdk": 0.0471, "stdq": 0.0423, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 39300 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 4.0134, "doc_norm": 1.377, "encoder_q-embeddings": 2508.2825, "encoder_q-layer.0": 1680.2284, "encoder_q-layer.1": 1799.193, "encoder_q-layer.10": 2551.5342, "encoder_q-layer.11": 6577.1294, "encoder_q-layer.2": 1941.5476, "encoder_q-layer.3": 2119.4497, "encoder_q-layer.4": 2120.4036, "encoder_q-layer.5": 2018.0483, "encoder_q-layer.6": 2187.5142, "encoder_q-layer.7": 2346.4724, "encoder_q-layer.8": 2610.4795, "encoder_q-layer.9": 2335.2092, "epoch": 0.26, "inbatch_neg_score": 0.2295, "inbatch_pos_score": 0.7637, "learning_rate": 3.366666666666667e-05, "loss": 4.0134, "norm_diff": 0.064, "norm_loss": 0.0, "num_token_doc": 66.6271, "num_token_overlap": 11.68, "num_token_query": 31.3992, "num_token_union": 65.0929, "num_word_context": 202.3741, "num_word_doc": 49.7496, "num_word_query": 23.3236, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4274.711, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2294, "query_norm": 1.3129, "queue_k_norm": 1.376, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3992, "sent_len_1": 66.6271, "sent_len_max_0": 127.4513, "sent_len_max_1": 187.6612, "stdk": 0.0468, "stdq": 0.042, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 39400 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.9929, "doc_norm": 1.3799, "encoder_q-embeddings": 5953.4463, "encoder_q-layer.0": 3937.4014, "encoder_q-layer.1": 4159.4951, "encoder_q-layer.10": 2736.6287, "encoder_q-layer.11": 6156.6738, "encoder_q-layer.2": 4859.0396, "encoder_q-layer.3": 4949.1919, "encoder_q-layer.4": 5085.3667, "encoder_q-layer.5": 5086.9849, "encoder_q-layer.6": 4496.6582, "encoder_q-layer.7": 3921.4644, "encoder_q-layer.8": 3467.7373, "encoder_q-layer.9": 2470.1489, "epoch": 0.26, "inbatch_neg_score": 0.2222, "inbatch_pos_score": 0.7651, "learning_rate": 3.3611111111111116e-05, "loss": 3.9929, "norm_diff": 0.0692, "norm_loss": 0.0, "num_token_doc": 66.9451, "num_token_overlap": 11.6663, "num_token_query": 31.3901, "num_token_union": 65.2295, "num_word_context": 202.5971, "num_word_doc": 49.9729, "num_word_query": 23.3221, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6875.5824, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2223, "query_norm": 1.3107, "queue_k_norm": 1.3752, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3901, "sent_len_1": 66.9451, "sent_len_max_0": 127.445, "sent_len_max_1": 191.2988, "stdk": 0.0469, "stdq": 0.0423, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 39500 }, { "accuracy": 42.8711, "active_queue_size": 16384.0, "cl_loss": 3.988, "doc_norm": 1.3696, "encoder_q-embeddings": 3304.4148, "encoder_q-layer.0": 2172.1587, "encoder_q-layer.1": 2395.0283, "encoder_q-layer.10": 2869.6992, "encoder_q-layer.11": 6687.8501, "encoder_q-layer.2": 2845.7124, "encoder_q-layer.3": 3019.5759, "encoder_q-layer.4": 3184.1958, "encoder_q-layer.5": 3085.6648, "encoder_q-layer.6": 3193.0132, "encoder_q-layer.7": 3071.0703, "encoder_q-layer.8": 3226.2942, "encoder_q-layer.9": 2760.3442, "epoch": 0.26, "inbatch_neg_score": 0.218, "inbatch_pos_score": 0.7432, "learning_rate": 3.355555555555556e-05, "loss": 3.988, "norm_diff": 0.0497, "norm_loss": 0.0, "num_token_doc": 66.5189, "num_token_overlap": 11.6651, "num_token_query": 31.3969, "num_token_union": 64.9417, "num_word_context": 201.6925, "num_word_doc": 49.6554, "num_word_query": 23.3036, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5103.0401, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2179, "query_norm": 1.32, "queue_k_norm": 1.3759, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3969, "sent_len_1": 66.5189, "sent_len_max_0": 127.54, "sent_len_max_1": 189.7663, "stdk": 0.0465, "stdq": 0.0427, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 39600 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.9964, "doc_norm": 1.3808, "encoder_q-embeddings": 3153.6233, "encoder_q-layer.0": 2211.1057, "encoder_q-layer.1": 2472.9976, "encoder_q-layer.10": 2742.717, "encoder_q-layer.11": 6941.231, "encoder_q-layer.2": 2524.7075, "encoder_q-layer.3": 2584.157, "encoder_q-layer.4": 2502.3879, "encoder_q-layer.5": 2625.1963, "encoder_q-layer.6": 2728.0322, "encoder_q-layer.7": 2942.9905, "encoder_q-layer.8": 3131.635, "encoder_q-layer.9": 2518.2703, "epoch": 0.26, "inbatch_neg_score": 0.2201, "inbatch_pos_score": 0.7568, "learning_rate": 3.35e-05, "loss": 3.9964, "norm_diff": 0.0722, "norm_loss": 0.0, "num_token_doc": 66.6161, "num_token_overlap": 11.6238, "num_token_query": 31.2915, "num_token_union": 65.0384, "num_word_context": 202.0934, "num_word_doc": 49.7307, "num_word_query": 23.2573, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4937.902, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2213, "query_norm": 1.3086, "queue_k_norm": 1.3774, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2915, "sent_len_1": 66.6161, "sent_len_max_0": 127.5763, "sent_len_max_1": 187.9638, "stdk": 0.0469, "stdq": 0.0422, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 39700 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.9944, "doc_norm": 1.3758, "encoder_q-embeddings": 2622.2878, "encoder_q-layer.0": 1777.5354, "encoder_q-layer.1": 1801.2048, "encoder_q-layer.10": 2529.3794, "encoder_q-layer.11": 6552.9692, "encoder_q-layer.2": 2078.0461, "encoder_q-layer.3": 2082.0754, "encoder_q-layer.4": 2155.394, "encoder_q-layer.5": 2041.1752, "encoder_q-layer.6": 2251.7275, "encoder_q-layer.7": 2452.7642, "encoder_q-layer.8": 2715.0359, "encoder_q-layer.9": 2440.3042, "epoch": 0.26, "inbatch_neg_score": 0.2124, "inbatch_pos_score": 0.7412, "learning_rate": 3.3444444444444443e-05, "loss": 3.9944, "norm_diff": 0.0767, "norm_loss": 0.0, "num_token_doc": 66.8588, "num_token_overlap": 11.6267, "num_token_query": 31.341, "num_token_union": 65.2046, "num_word_context": 202.3913, "num_word_doc": 49.8738, "num_word_query": 23.2688, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4374.8741, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2113, "query_norm": 1.299, "queue_k_norm": 1.3764, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.341, "sent_len_1": 66.8588, "sent_len_max_0": 127.59, "sent_len_max_1": 191.4087, "stdk": 0.0467, "stdq": 0.042, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 39800 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.9757, "doc_norm": 1.3741, "encoder_q-embeddings": 3832.9382, "encoder_q-layer.0": 2602.8408, "encoder_q-layer.1": 2741.4473, "encoder_q-layer.10": 2559.239, "encoder_q-layer.11": 6467.2305, "encoder_q-layer.2": 3181.4854, "encoder_q-layer.3": 3071.4241, "encoder_q-layer.4": 2934.4446, "encoder_q-layer.5": 2436.8894, "encoder_q-layer.6": 2536.3743, "encoder_q-layer.7": 2716.8303, "encoder_q-layer.8": 2782.9297, "encoder_q-layer.9": 2357.5239, "epoch": 0.26, "inbatch_neg_score": 0.2097, "inbatch_pos_score": 0.7402, "learning_rate": 3.338888888888889e-05, "loss": 3.9757, "norm_diff": 0.0897, "norm_loss": 0.0, "num_token_doc": 66.7521, "num_token_overlap": 11.7, "num_token_query": 31.472, "num_token_union": 65.1473, "num_word_context": 202.4349, "num_word_doc": 49.8104, "num_word_query": 23.3842, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5096.5295, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2095, "query_norm": 1.2843, "queue_k_norm": 1.3786, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.472, "sent_len_1": 66.7521, "sent_len_max_0": 127.45, "sent_len_max_1": 188.4538, "stdk": 0.0467, "stdq": 0.0414, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 39900 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.9777, "doc_norm": 1.382, "encoder_q-embeddings": 6605.7485, "encoder_q-layer.0": 4746.7148, "encoder_q-layer.1": 5505.9937, "encoder_q-layer.10": 2363.3733, "encoder_q-layer.11": 6238.9424, "encoder_q-layer.2": 5980.0928, "encoder_q-layer.3": 5301.1577, "encoder_q-layer.4": 4584.1069, "encoder_q-layer.5": 4368.9619, "encoder_q-layer.6": 3726.6831, "encoder_q-layer.7": 3227.9688, "encoder_q-layer.8": 3053.0876, "encoder_q-layer.9": 2514.4504, "epoch": 0.26, "inbatch_neg_score": 0.2125, "inbatch_pos_score": 0.7686, "learning_rate": 3.3333333333333335e-05, "loss": 3.9777, "norm_diff": 0.0737, "norm_loss": 0.0, "num_token_doc": 66.6279, "num_token_overlap": 11.6713, "num_token_query": 31.3679, "num_token_union": 65.0657, "num_word_context": 202.1893, "num_word_doc": 49.6983, "num_word_query": 23.2827, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7261.4043, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2115, "query_norm": 1.3083, "queue_k_norm": 1.3779, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3679, "sent_len_1": 66.6279, "sent_len_max_0": 127.51, "sent_len_max_1": 189.1687, "stdk": 0.047, "stdq": 0.0424, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 40000 }, { "dev_runtime": 29.1551, "dev_samples_per_second": 2.195, "dev_steps_per_second": 0.034, "epoch": 0.26, "step": 40000, "test_accuracy": 92.37060546875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4509429633617401, "test_doc_norm": 1.3349823951721191, "test_inbatch_neg_score": 0.5115825533866882, "test_inbatch_pos_score": 1.3523807525634766, "test_loss": 0.4509429633617401, "test_loss_align": 1.0445733070373535, "test_loss_unif": 3.906904935836792, "test_loss_unif_q@queue": 3.906904458999634, "test_norm_diff": 0.056590545922517776, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.1947774887084961, "test_query_norm": 1.3915729522705078, "test_queue_k_norm": 1.377737283706665, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04039176553487778, "test_stdq": 0.04059065505862236, "test_stdqueue_k": 0.046927254647016525, "test_stdqueue_q": 0.0 }, { "dev_runtime": 29.1551, "dev_samples_per_second": 2.195, "dev_steps_per_second": 0.034, "epoch": 0.26, "eval_beir-arguana_ndcg@10": 0.31632, "eval_beir-arguana_recall@10": 0.5505, "eval_beir-arguana_recall@100": 0.87127, "eval_beir-arguana_recall@20": 0.68777, "eval_beir-avg_ndcg@10": 0.35031108333333333, "eval_beir-avg_recall@10": 0.4192894166666667, "eval_beir-avg_recall@100": 0.6018753333333333, "eval_beir-avg_recall@20": 0.4809461666666667, "eval_beir-cqadupstack_ndcg@10": 0.23290083333333333, "eval_beir-cqadupstack_recall@10": 0.3202441666666667, "eval_beir-cqadupstack_recall@100": 0.5520733333333334, "eval_beir-cqadupstack_recall@20": 0.3864616666666667, "eval_beir-fiqa_ndcg@10": 0.21439, "eval_beir-fiqa_recall@10": 0.27344, "eval_beir-fiqa_recall@100": 0.53248, "eval_beir-fiqa_recall@20": 0.34864, "eval_beir-nfcorpus_ndcg@10": 0.28721, "eval_beir-nfcorpus_recall@10": 0.13498, "eval_beir-nfcorpus_recall@100": 0.27296, "eval_beir-nfcorpus_recall@20": 0.17203, "eval_beir-nq_ndcg@10": 0.25382, "eval_beir-nq_recall@10": 0.41802, "eval_beir-nq_recall@100": 0.76516, "eval_beir-nq_recall@20": 0.53764, "eval_beir-quora_ndcg@10": 0.74608, "eval_beir-quora_recall@10": 0.86001, "eval_beir-quora_recall@100": 0.96873, "eval_beir-quora_recall@20": 0.90877, "eval_beir-scidocs_ndcg@10": 0.13935, "eval_beir-scidocs_recall@10": 0.14417, "eval_beir-scidocs_recall@100": 0.34247, "eval_beir-scidocs_recall@20": 0.20012, "eval_beir-scifact_ndcg@10": 0.59641, "eval_beir-scifact_recall@10": 0.76489, "eval_beir-scifact_recall@100": 0.89589, "eval_beir-scifact_recall@20": 0.81567, "eval_beir-trec-covid_ndcg@10": 0.5292, "eval_beir-trec-covid_recall@10": 0.592, "eval_beir-trec-covid_recall@100": 0.3984, "eval_beir-trec-covid_recall@20": 0.551, "eval_beir-webis-touche2020_ndcg@10": 0.18743, "eval_beir-webis-touche2020_recall@10": 0.13464, "eval_beir-webis-touche2020_recall@100": 0.41932, "eval_beir-webis-touche2020_recall@20": 0.20136, "eval_senteval-avg_sts": 0.7545679467012998, "eval_senteval-sickr_spearman": 0.7208963038673581, "eval_senteval-stsb_spearman": 0.7882395895352413, "step": 40000, "test_accuracy": 92.37060546875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4509429633617401, "test_doc_norm": 1.3349823951721191, "test_inbatch_neg_score": 0.5115825533866882, "test_inbatch_pos_score": 1.3523807525634766, "test_loss": 0.4509429633617401, "test_loss_align": 1.0445733070373535, "test_loss_unif": 3.906904935836792, "test_loss_unif_q@queue": 3.906904458999634, "test_norm_diff": 0.056590545922517776, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.1947774887084961, "test_query_norm": 1.3915729522705078, "test_queue_k_norm": 1.377737283706665, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04039176553487778, "test_stdq": 0.04059065505862236, "test_stdqueue_k": 0.046927254647016525, "test_stdqueue_q": 0.0 }, { "accuracy": 42.5781, "active_queue_size": 16384.0, "cl_loss": 4.0102, "doc_norm": 1.377, "encoder_q-embeddings": 2702.7751, "encoder_q-layer.0": 1856.5251, "encoder_q-layer.1": 1920.4205, "encoder_q-layer.10": 2636.323, "encoder_q-layer.11": 6266.4658, "encoder_q-layer.2": 2083.8201, "encoder_q-layer.3": 2071.6536, "encoder_q-layer.4": 1999.1143, "encoder_q-layer.5": 1896.2389, "encoder_q-layer.6": 2131.7175, "encoder_q-layer.7": 2221.261, "encoder_q-layer.8": 2549.1172, "encoder_q-layer.9": 2362.4927, "epoch": 0.26, "inbatch_neg_score": 0.2093, "inbatch_pos_score": 0.7363, "learning_rate": 3.327777777777778e-05, "loss": 4.0102, "norm_diff": 0.0798, "norm_loss": 0.0, "num_token_doc": 66.6907, "num_token_overlap": 11.6397, "num_token_query": 31.2549, "num_token_union": 65.0308, "num_word_context": 202.3372, "num_word_doc": 49.7713, "num_word_query": 23.2228, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4270.5397, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2103, "query_norm": 1.2971, "queue_k_norm": 1.3754, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2549, "sent_len_1": 66.6907, "sent_len_max_0": 127.5825, "sent_len_max_1": 189.4762, "stdk": 0.0468, "stdq": 0.0421, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 40100 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.9896, "doc_norm": 1.3721, "encoder_q-embeddings": 5316.1611, "encoder_q-layer.0": 3577.3333, "encoder_q-layer.1": 3966.7805, "encoder_q-layer.10": 2794.9714, "encoder_q-layer.11": 6556.6611, "encoder_q-layer.2": 4765.6733, "encoder_q-layer.3": 5393.9863, "encoder_q-layer.4": 5829.3662, "encoder_q-layer.5": 5289.5752, "encoder_q-layer.6": 4325.8862, "encoder_q-layer.7": 3174.3196, "encoder_q-layer.8": 2984.4031, "encoder_q-layer.9": 2537.8862, "epoch": 0.26, "inbatch_neg_score": 0.208, "inbatch_pos_score": 0.7427, "learning_rate": 3.322222222222222e-05, "loss": 3.9896, "norm_diff": 0.07, "norm_loss": 0.0, "num_token_doc": 66.8186, "num_token_overlap": 11.6597, "num_token_query": 31.376, "num_token_union": 65.1491, "num_word_context": 202.4153, "num_word_doc": 49.8881, "num_word_query": 23.3221, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6768.8806, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2079, "query_norm": 1.3021, "queue_k_norm": 1.3755, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.376, "sent_len_1": 66.8186, "sent_len_max_0": 127.5312, "sent_len_max_1": 188.7113, "stdk": 0.0467, "stdq": 0.0423, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 40200 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.982, "doc_norm": 1.3848, "encoder_q-embeddings": 2984.4038, "encoder_q-layer.0": 2039.2418, "encoder_q-layer.1": 2270.783, "encoder_q-layer.10": 2412.1804, "encoder_q-layer.11": 6351.708, "encoder_q-layer.2": 2535.2529, "encoder_q-layer.3": 2483.5161, "encoder_q-layer.4": 2414.3708, "encoder_q-layer.5": 2371.5161, "encoder_q-layer.6": 2554.9851, "encoder_q-layer.7": 2629.1294, "encoder_q-layer.8": 2745.8789, "encoder_q-layer.9": 2427.7817, "epoch": 0.26, "inbatch_neg_score": 0.2037, "inbatch_pos_score": 0.751, "learning_rate": 3.316666666666667e-05, "loss": 3.982, "norm_diff": 0.0862, "norm_loss": 0.0, "num_token_doc": 66.781, "num_token_overlap": 11.6542, "num_token_query": 31.3327, "num_token_union": 65.0598, "num_word_context": 202.6282, "num_word_doc": 49.821, "num_word_query": 23.2644, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4569.2229, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2036, "query_norm": 1.2987, "queue_k_norm": 1.3767, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3327, "sent_len_1": 66.781, "sent_len_max_0": 127.5987, "sent_len_max_1": 191.475, "stdk": 0.0472, "stdq": 0.0423, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 40300 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.9732, "doc_norm": 1.3783, "encoder_q-embeddings": 2603.0061, "encoder_q-layer.0": 1737.3693, "encoder_q-layer.1": 1777.5459, "encoder_q-layer.10": 2784.459, "encoder_q-layer.11": 5951.4629, "encoder_q-layer.2": 1945.277, "encoder_q-layer.3": 1924.6803, "encoder_q-layer.4": 2011.2275, "encoder_q-layer.5": 2077.948, "encoder_q-layer.6": 2109.1133, "encoder_q-layer.7": 2317.2451, "encoder_q-layer.8": 2518.3, "encoder_q-layer.9": 2472.0095, "epoch": 0.26, "inbatch_neg_score": 0.2047, "inbatch_pos_score": 0.7563, "learning_rate": 3.311111111111112e-05, "loss": 3.9732, "norm_diff": 0.0812, "norm_loss": 0.0, "num_token_doc": 66.6513, "num_token_overlap": 11.6929, "num_token_query": 31.3408, "num_token_union": 65.0381, "num_word_context": 202.2851, "num_word_doc": 49.7503, "num_word_query": 23.261, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4079.2058, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2048, "query_norm": 1.2972, "queue_k_norm": 1.3775, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3408, "sent_len_1": 66.6513, "sent_len_max_0": 127.4575, "sent_len_max_1": 187.8875, "stdk": 0.0469, "stdq": 0.0422, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 40400 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 3.9826, "doc_norm": 1.3751, "encoder_q-embeddings": 3842.6082, "encoder_q-layer.0": 2677.4124, "encoder_q-layer.1": 2771.238, "encoder_q-layer.10": 3032.074, "encoder_q-layer.11": 6512.8486, "encoder_q-layer.2": 3142.1921, "encoder_q-layer.3": 3066.321, "encoder_q-layer.4": 2676.6077, "encoder_q-layer.5": 2461.6282, "encoder_q-layer.6": 2610.3389, "encoder_q-layer.7": 2684.3171, "encoder_q-layer.8": 3026.9524, "encoder_q-layer.9": 2620.707, "epoch": 0.26, "inbatch_neg_score": 0.2014, "inbatch_pos_score": 0.7363, "learning_rate": 3.3055555555555553e-05, "loss": 3.9826, "norm_diff": 0.0596, "norm_loss": 0.0, "num_token_doc": 66.7273, "num_token_overlap": 11.6127, "num_token_query": 31.2988, "num_token_union": 65.0933, "num_word_context": 202.3552, "num_word_doc": 49.7787, "num_word_query": 23.256, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5101.6253, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2013, "query_norm": 1.3155, "queue_k_norm": 1.3757, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2988, "sent_len_1": 66.7273, "sent_len_max_0": 127.6175, "sent_len_max_1": 190.0563, "stdk": 0.0468, "stdq": 0.0429, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 40500 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.9844, "doc_norm": 1.3788, "encoder_q-embeddings": 9446.3203, "encoder_q-layer.0": 6579.9229, "encoder_q-layer.1": 6373.3408, "encoder_q-layer.10": 2493.8127, "encoder_q-layer.11": 5940.1865, "encoder_q-layer.2": 7383.6284, "encoder_q-layer.3": 7366.9805, "encoder_q-layer.4": 6051.3223, "encoder_q-layer.5": 4895.0337, "encoder_q-layer.6": 4135.6538, "encoder_q-layer.7": 3679.011, "encoder_q-layer.8": 3319.9841, "encoder_q-layer.9": 2482.6157, "epoch": 0.26, "inbatch_neg_score": 0.2035, "inbatch_pos_score": 0.7559, "learning_rate": 3.3e-05, "loss": 3.9844, "norm_diff": 0.0612, "norm_loss": 0.0, "num_token_doc": 66.9714, "num_token_overlap": 11.6983, "num_token_query": 31.4731, "num_token_union": 65.299, "num_word_context": 202.8873, "num_word_doc": 49.9515, "num_word_query": 23.3718, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8945.377, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2036, "query_norm": 1.3176, "queue_k_norm": 1.3769, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4731, "sent_len_1": 66.9714, "sent_len_max_0": 127.47, "sent_len_max_1": 192.0513, "stdk": 0.047, "stdq": 0.0428, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 40600 }, { "accuracy": 41.1133, "active_queue_size": 16384.0, "cl_loss": 3.9848, "doc_norm": 1.3786, "encoder_q-embeddings": 7249.9702, "encoder_q-layer.0": 6023.4263, "encoder_q-layer.1": 6452.2935, "encoder_q-layer.10": 2446.6003, "encoder_q-layer.11": 5919.7393, "encoder_q-layer.2": 6397.623, "encoder_q-layer.3": 6384.7529, "encoder_q-layer.4": 6243.1172, "encoder_q-layer.5": 3971.4783, "encoder_q-layer.6": 3505.4417, "encoder_q-layer.7": 3140.1438, "encoder_q-layer.8": 3085.1406, "encoder_q-layer.9": 2543.8994, "epoch": 0.26, "inbatch_neg_score": 0.1997, "inbatch_pos_score": 0.7168, "learning_rate": 3.2944444444444445e-05, "loss": 3.9848, "norm_diff": 0.0923, "norm_loss": 0.0, "num_token_doc": 66.9092, "num_token_overlap": 11.6228, "num_token_query": 31.229, "num_token_union": 65.1377, "num_word_context": 202.3827, "num_word_doc": 49.9271, "num_word_query": 23.1871, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7875.5094, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1989, "query_norm": 1.2864, "queue_k_norm": 1.3753, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.229, "sent_len_1": 66.9092, "sent_len_max_0": 127.4688, "sent_len_max_1": 190.4688, "stdk": 0.0469, "stdq": 0.0418, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 40700 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.9691, "doc_norm": 1.3739, "encoder_q-embeddings": 3967.4258, "encoder_q-layer.0": 2742.4614, "encoder_q-layer.1": 2875.49, "encoder_q-layer.10": 2392.3281, "encoder_q-layer.11": 5765.7466, "encoder_q-layer.2": 3219.7393, "encoder_q-layer.3": 3391.0596, "encoder_q-layer.4": 3284.2605, "encoder_q-layer.5": 3004.7805, "encoder_q-layer.6": 2957.1384, "encoder_q-layer.7": 2938.6853, "encoder_q-layer.8": 2904.6709, "encoder_q-layer.9": 2274.6111, "epoch": 0.27, "inbatch_neg_score": 0.1977, "inbatch_pos_score": 0.7363, "learning_rate": 3.2888888888888894e-05, "loss": 3.9691, "norm_diff": 0.0719, "norm_loss": 0.0, "num_token_doc": 66.9658, "num_token_overlap": 11.7244, "num_token_query": 31.433, "num_token_union": 65.2346, "num_word_context": 202.4835, "num_word_doc": 49.9907, "num_word_query": 23.3481, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5057.7241, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1995, "query_norm": 1.3019, "queue_k_norm": 1.3771, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.433, "sent_len_1": 66.9658, "sent_len_max_0": 127.5225, "sent_len_max_1": 189.5913, "stdk": 0.0468, "stdq": 0.0424, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 40800 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 3.9832, "doc_norm": 1.3734, "encoder_q-embeddings": 3378.6357, "encoder_q-layer.0": 2271.5435, "encoder_q-layer.1": 2451.7927, "encoder_q-layer.10": 2498.7705, "encoder_q-layer.11": 6073.3511, "encoder_q-layer.2": 2644.2305, "encoder_q-layer.3": 2629.022, "encoder_q-layer.4": 2790.1335, "encoder_q-layer.5": 2498.8909, "encoder_q-layer.6": 2698.5352, "encoder_q-layer.7": 2726.8662, "encoder_q-layer.8": 2741.5742, "encoder_q-layer.9": 2386.3662, "epoch": 0.27, "inbatch_neg_score": 0.1974, "inbatch_pos_score": 0.7339, "learning_rate": 3.283333333333333e-05, "loss": 3.9832, "norm_diff": 0.0706, "norm_loss": 0.0, "num_token_doc": 66.9756, "num_token_overlap": 11.6388, "num_token_query": 31.229, "num_token_union": 65.1465, "num_word_context": 202.0137, "num_word_doc": 49.9857, "num_word_query": 23.1758, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4730.8208, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1986, "query_norm": 1.3029, "queue_k_norm": 1.3749, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.229, "sent_len_1": 66.9756, "sent_len_max_0": 127.425, "sent_len_max_1": 189.8537, "stdk": 0.0467, "stdq": 0.0427, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 40900 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.9646, "doc_norm": 1.3779, "encoder_q-embeddings": 2634.1135, "encoder_q-layer.0": 1739.3479, "encoder_q-layer.1": 1864.3795, "encoder_q-layer.10": 2614.543, "encoder_q-layer.11": 6210.2061, "encoder_q-layer.2": 2103.7029, "encoder_q-layer.3": 2105.9219, "encoder_q-layer.4": 2258.8506, "encoder_q-layer.5": 2186.5083, "encoder_q-layer.6": 2434.7244, "encoder_q-layer.7": 2399.9482, "encoder_q-layer.8": 2683.2246, "encoder_q-layer.9": 2299.823, "epoch": 0.27, "inbatch_neg_score": 0.2023, "inbatch_pos_score": 0.7388, "learning_rate": 3.277777777777778e-05, "loss": 3.9646, "norm_diff": 0.0847, "norm_loss": 0.0, "num_token_doc": 66.7182, "num_token_overlap": 11.6746, "num_token_query": 31.4258, "num_token_union": 65.1244, "num_word_context": 202.0427, "num_word_doc": 49.7474, "num_word_query": 23.3419, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4305.5928, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2025, "query_norm": 1.2932, "queue_k_norm": 1.3732, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4258, "sent_len_1": 66.7182, "sent_len_max_0": 127.4163, "sent_len_max_1": 190.0588, "stdk": 0.0469, "stdq": 0.0423, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 41000 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 3.9685, "doc_norm": 1.3706, "encoder_q-embeddings": 5247.0547, "encoder_q-layer.0": 3548.0991, "encoder_q-layer.1": 3628.5005, "encoder_q-layer.10": 4720.4712, "encoder_q-layer.11": 11459.5977, "encoder_q-layer.2": 3915.127, "encoder_q-layer.3": 3849.3774, "encoder_q-layer.4": 3827.4434, "encoder_q-layer.5": 3876.4751, "encoder_q-layer.6": 4036.8672, "encoder_q-layer.7": 4435.7109, "encoder_q-layer.8": 5122.8462, "encoder_q-layer.9": 4675.2935, "epoch": 0.27, "inbatch_neg_score": 0.1884, "inbatch_pos_score": 0.7231, "learning_rate": 3.272222222222223e-05, "loss": 3.9685, "norm_diff": 0.0713, "norm_loss": 0.0, "num_token_doc": 66.7375, "num_token_overlap": 11.6753, "num_token_query": 31.3939, "num_token_union": 65.1048, "num_word_context": 202.2404, "num_word_doc": 49.7907, "num_word_query": 23.3219, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7994.4507, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1873, "query_norm": 1.2993, "queue_k_norm": 1.3748, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3939, "sent_len_1": 66.7375, "sent_len_max_0": 127.4425, "sent_len_max_1": 189.8175, "stdk": 0.0467, "stdq": 0.0427, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 41100 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 3.9801, "doc_norm": 1.3701, "encoder_q-embeddings": 6556.1362, "encoder_q-layer.0": 4950.9941, "encoder_q-layer.1": 4984.2778, "encoder_q-layer.10": 4919.5464, "encoder_q-layer.11": 12269.5225, "encoder_q-layer.2": 5299.9902, "encoder_q-layer.3": 5068.8682, "encoder_q-layer.4": 5468.2319, "encoder_q-layer.5": 5526.1636, "encoder_q-layer.6": 5753.7017, "encoder_q-layer.7": 5230.8398, "encoder_q-layer.8": 5682.0576, "encoder_q-layer.9": 4771.1235, "epoch": 0.27, "inbatch_neg_score": 0.1821, "inbatch_pos_score": 0.7305, "learning_rate": 3.266666666666667e-05, "loss": 3.9801, "norm_diff": 0.0731, "norm_loss": 0.0, "num_token_doc": 66.775, "num_token_overlap": 11.7405, "num_token_query": 31.5951, "num_token_union": 65.222, "num_word_context": 202.3661, "num_word_doc": 49.8149, "num_word_query": 23.4937, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9534.6198, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1821, "query_norm": 1.2969, "queue_k_norm": 1.3736, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5951, "sent_len_1": 66.775, "sent_len_max_0": 127.3775, "sent_len_max_1": 188.9625, "stdk": 0.0467, "stdq": 0.0426, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 41200 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.9596, "doc_norm": 1.3793, "encoder_q-embeddings": 9995.7871, "encoder_q-layer.0": 6908.52, "encoder_q-layer.1": 7533.0767, "encoder_q-layer.10": 5411.4194, "encoder_q-layer.11": 13395.6191, "encoder_q-layer.2": 9008.4062, "encoder_q-layer.3": 9663.9121, "encoder_q-layer.4": 9263.0156, "encoder_q-layer.5": 8454.8105, "encoder_q-layer.6": 8552.0264, "encoder_q-layer.7": 7612.5474, "encoder_q-layer.8": 7868.3613, "encoder_q-layer.9": 5735.2261, "epoch": 0.27, "inbatch_neg_score": 0.1811, "inbatch_pos_score": 0.7246, "learning_rate": 3.261111111111111e-05, "loss": 3.9596, "norm_diff": 0.0976, "norm_loss": 0.0, "num_token_doc": 66.8375, "num_token_overlap": 11.6745, "num_token_query": 31.2037, "num_token_union": 65.0251, "num_word_context": 202.1533, "num_word_doc": 49.8749, "num_word_query": 23.166, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13127.8523, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1807, "query_norm": 1.2817, "queue_k_norm": 1.374, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2037, "sent_len_1": 66.8375, "sent_len_max_0": 127.2763, "sent_len_max_1": 190.9338, "stdk": 0.0471, "stdq": 0.042, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 41300 }, { "accuracy": 42.7734, "active_queue_size": 16384.0, "cl_loss": 3.9715, "doc_norm": 1.3779, "encoder_q-embeddings": 7206.5835, "encoder_q-layer.0": 4813.8452, "encoder_q-layer.1": 5007.5054, "encoder_q-layer.10": 5300.5649, "encoder_q-layer.11": 12859.8721, "encoder_q-layer.2": 5781.8955, "encoder_q-layer.3": 6641.3574, "encoder_q-layer.4": 7191.3931, "encoder_q-layer.5": 7489.7827, "encoder_q-layer.6": 8623.917, "encoder_q-layer.7": 8140.5859, "encoder_q-layer.8": 8229.6582, "encoder_q-layer.9": 5949.1206, "epoch": 0.27, "inbatch_neg_score": 0.1866, "inbatch_pos_score": 0.7158, "learning_rate": 3.2555555555555555e-05, "loss": 3.9715, "norm_diff": 0.0751, "norm_loss": 0.0, "num_token_doc": 66.9759, "num_token_overlap": 11.6439, "num_token_query": 31.261, "num_token_union": 65.1446, "num_word_context": 202.3077, "num_word_doc": 49.9301, "num_word_query": 23.2018, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11119.9935, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1857, "query_norm": 1.3027, "queue_k_norm": 1.3739, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.261, "sent_len_1": 66.9759, "sent_len_max_0": 127.4575, "sent_len_max_1": 191.3262, "stdk": 0.047, "stdq": 0.0427, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 41400 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 3.9913, "doc_norm": 1.3686, "encoder_q-embeddings": 6077.3623, "encoder_q-layer.0": 4148.2847, "encoder_q-layer.1": 4627.8228, "encoder_q-layer.10": 5212.0425, "encoder_q-layer.11": 12382.5039, "encoder_q-layer.2": 5318.9692, "encoder_q-layer.3": 5085.8735, "encoder_q-layer.4": 4820.5537, "encoder_q-layer.5": 4716.0347, "encoder_q-layer.6": 4546.5396, "encoder_q-layer.7": 4978.9028, "encoder_q-layer.8": 5477.3164, "encoder_q-layer.9": 4908.4746, "epoch": 0.27, "inbatch_neg_score": 0.1826, "inbatch_pos_score": 0.7241, "learning_rate": 3.2500000000000004e-05, "loss": 3.9913, "norm_diff": 0.0628, "norm_loss": 0.0, "num_token_doc": 66.7786, "num_token_overlap": 11.6296, "num_token_query": 31.2407, "num_token_union": 65.0922, "num_word_context": 202.5468, "num_word_doc": 49.8435, "num_word_query": 23.2015, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9041.1364, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1829, "query_norm": 1.3058, "queue_k_norm": 1.3724, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2407, "sent_len_1": 66.7786, "sent_len_max_0": 127.415, "sent_len_max_1": 189.3175, "stdk": 0.0468, "stdq": 0.043, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 41500 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.9587, "doc_norm": 1.3738, "encoder_q-embeddings": 51909.0742, "encoder_q-layer.0": 42844.75, "encoder_q-layer.1": 44553.4727, "encoder_q-layer.10": 5216.7075, "encoder_q-layer.11": 12722.2354, "encoder_q-layer.2": 59225.0117, "encoder_q-layer.3": 47847.5312, "encoder_q-layer.4": 37593.25, "encoder_q-layer.5": 24559.1055, "encoder_q-layer.6": 16613.0293, "encoder_q-layer.7": 14106.9033, "encoder_q-layer.8": 7547.1416, "encoder_q-layer.9": 4817.7334, "epoch": 0.27, "inbatch_neg_score": 0.1895, "inbatch_pos_score": 0.7197, "learning_rate": 3.2444444444444446e-05, "loss": 3.9587, "norm_diff": 0.0895, "norm_loss": 0.0, "num_token_doc": 66.7427, "num_token_overlap": 11.6885, "num_token_query": 31.3883, "num_token_union": 65.1003, "num_word_context": 202.2015, "num_word_doc": 49.8232, "num_word_query": 23.2939, "postclip_grad_norm": 1.0, "preclip_grad_norm": 53482.1296, "preclip_grad_norm_avg": 0.0005, "q@queue_neg_score": 0.1884, "query_norm": 1.2843, "queue_k_norm": 1.3701, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3883, "sent_len_1": 66.7427, "sent_len_max_0": 127.4562, "sent_len_max_1": 187.9338, "stdk": 0.0469, "stdq": 0.042, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 41600 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.9722, "doc_norm": 1.3725, "encoder_q-embeddings": 4968.6934, "encoder_q-layer.0": 3223.4377, "encoder_q-layer.1": 3400.2634, "encoder_q-layer.10": 5716.2031, "encoder_q-layer.11": 12382.3975, "encoder_q-layer.2": 3844.4685, "encoder_q-layer.3": 3849.4656, "encoder_q-layer.4": 4013.8562, "encoder_q-layer.5": 3877.4368, "encoder_q-layer.6": 4368.1255, "encoder_q-layer.7": 4945.6265, "encoder_q-layer.8": 6140.02, "encoder_q-layer.9": 5544.6958, "epoch": 0.27, "inbatch_neg_score": 0.1877, "inbatch_pos_score": 0.7456, "learning_rate": 3.238888888888889e-05, "loss": 3.9722, "norm_diff": 0.093, "norm_loss": 0.0, "num_token_doc": 66.5992, "num_token_overlap": 11.634, "num_token_query": 31.2662, "num_token_union": 64.9858, "num_word_context": 202.0027, "num_word_doc": 49.697, "num_word_query": 23.2135, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8392.043, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1879, "query_norm": 1.2795, "queue_k_norm": 1.3709, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2662, "sent_len_1": 66.5992, "sent_len_max_0": 127.6012, "sent_len_max_1": 188.985, "stdk": 0.0469, "stdq": 0.0418, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 41700 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 3.97, "doc_norm": 1.3762, "encoder_q-embeddings": 7297.3101, "encoder_q-layer.0": 5549.3887, "encoder_q-layer.1": 5739.6494, "encoder_q-layer.10": 4666.2935, "encoder_q-layer.11": 12113.2842, "encoder_q-layer.2": 6732.6948, "encoder_q-layer.3": 6609.5776, "encoder_q-layer.4": 6301.4575, "encoder_q-layer.5": 5734.3584, "encoder_q-layer.6": 5631.4712, "encoder_q-layer.7": 5891.1943, "encoder_q-layer.8": 6454.9565, "encoder_q-layer.9": 4957.48, "epoch": 0.27, "inbatch_neg_score": 0.1901, "inbatch_pos_score": 0.7168, "learning_rate": 3.233333333333333e-05, "loss": 3.97, "norm_diff": 0.093, "norm_loss": 0.0, "num_token_doc": 66.7387, "num_token_overlap": 11.6563, "num_token_query": 31.2877, "num_token_union": 65.0584, "num_word_context": 202.4737, "num_word_doc": 49.7953, "num_word_query": 23.23, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10232.2772, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1888, "query_norm": 1.2832, "queue_k_norm": 1.3722, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2877, "sent_len_1": 66.7387, "sent_len_max_0": 127.34, "sent_len_max_1": 187.6463, "stdk": 0.0471, "stdq": 0.0419, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 41800 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.9522, "doc_norm": 1.3678, "encoder_q-embeddings": 4985.8647, "encoder_q-layer.0": 3707.5469, "encoder_q-layer.1": 4474.3613, "encoder_q-layer.10": 2641.967, "encoder_q-layer.11": 6331.1841, "encoder_q-layer.2": 4656.9766, "encoder_q-layer.3": 4491.8848, "encoder_q-layer.4": 3935.4998, "encoder_q-layer.5": 3710.1289, "encoder_q-layer.6": 3942.8967, "encoder_q-layer.7": 3241.3491, "encoder_q-layer.8": 3201.7422, "encoder_q-layer.9": 2583.6382, "epoch": 0.27, "inbatch_neg_score": 0.1852, "inbatch_pos_score": 0.7129, "learning_rate": 3.227777777777778e-05, "loss": 3.9522, "norm_diff": 0.0768, "norm_loss": 0.0, "num_token_doc": 66.8467, "num_token_overlap": 11.6347, "num_token_query": 31.2632, "num_token_union": 65.1113, "num_word_context": 202.3584, "num_word_doc": 49.8779, "num_word_query": 23.206, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6297.919, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1849, "query_norm": 1.291, "queue_k_norm": 1.3709, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2632, "sent_len_1": 66.8467, "sent_len_max_0": 127.6363, "sent_len_max_1": 189.6262, "stdk": 0.0468, "stdq": 0.0423, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 41900 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.9723, "doc_norm": 1.3738, "encoder_q-embeddings": 2504.4285, "encoder_q-layer.0": 1650.1906, "encoder_q-layer.1": 1697.9933, "encoder_q-layer.10": 2436.5671, "encoder_q-layer.11": 5970.2612, "encoder_q-layer.2": 1895.4457, "encoder_q-layer.3": 1937.5371, "encoder_q-layer.4": 2007.9548, "encoder_q-layer.5": 1930.2131, "encoder_q-layer.6": 2007.281, "encoder_q-layer.7": 2144.7649, "encoder_q-layer.8": 2613.6741, "encoder_q-layer.9": 2426.0183, "epoch": 0.27, "inbatch_neg_score": 0.1885, "inbatch_pos_score": 0.7373, "learning_rate": 3.222222222222223e-05, "loss": 3.9723, "norm_diff": 0.0904, "norm_loss": 0.0, "num_token_doc": 66.8825, "num_token_overlap": 11.6216, "num_token_query": 31.3144, "num_token_union": 65.2189, "num_word_context": 202.5734, "num_word_doc": 49.9086, "num_word_query": 23.2422, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4010.241, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1882, "query_norm": 1.2834, "queue_k_norm": 1.3703, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3144, "sent_len_1": 66.8825, "sent_len_max_0": 127.485, "sent_len_max_1": 188.1213, "stdk": 0.047, "stdq": 0.042, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 42000 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.9511, "doc_norm": 1.3671, "encoder_q-embeddings": 3145.5186, "encoder_q-layer.0": 2123.8838, "encoder_q-layer.1": 2187.2107, "encoder_q-layer.10": 2442.3164, "encoder_q-layer.11": 6113.1318, "encoder_q-layer.2": 2412.4292, "encoder_q-layer.3": 2530.54, "encoder_q-layer.4": 2657.6362, "encoder_q-layer.5": 2875.2595, "encoder_q-layer.6": 2673.7432, "encoder_q-layer.7": 3050.2471, "encoder_q-layer.8": 3299.688, "encoder_q-layer.9": 2563.9094, "epoch": 0.27, "inbatch_neg_score": 0.1898, "inbatch_pos_score": 0.7256, "learning_rate": 3.2166666666666665e-05, "loss": 3.9511, "norm_diff": 0.096, "norm_loss": 0.0, "num_token_doc": 66.8845, "num_token_overlap": 11.6603, "num_token_query": 31.3023, "num_token_union": 65.1047, "num_word_context": 202.2085, "num_word_doc": 49.864, "num_word_query": 23.2732, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4700.6954, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1892, "query_norm": 1.2711, "queue_k_norm": 1.3683, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3023, "sent_len_1": 66.8845, "sent_len_max_0": 127.5987, "sent_len_max_1": 190.2512, "stdk": 0.0468, "stdq": 0.0413, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 42100 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 3.9572, "doc_norm": 1.3664, "encoder_q-embeddings": 3208.2146, "encoder_q-layer.0": 2084.877, "encoder_q-layer.1": 2085.7056, "encoder_q-layer.10": 2609.4055, "encoder_q-layer.11": 6259.5991, "encoder_q-layer.2": 2342.5183, "encoder_q-layer.3": 2310.8914, "encoder_q-layer.4": 2379.6855, "encoder_q-layer.5": 2483.7581, "encoder_q-layer.6": 2748.2961, "encoder_q-layer.7": 3212.803, "encoder_q-layer.8": 3225.8469, "encoder_q-layer.9": 2515.3667, "epoch": 0.27, "inbatch_neg_score": 0.191, "inbatch_pos_score": 0.7339, "learning_rate": 3.2111111111111114e-05, "loss": 3.9572, "norm_diff": 0.0777, "norm_loss": 0.0, "num_token_doc": 66.7305, "num_token_overlap": 11.6387, "num_token_query": 31.3204, "num_token_union": 65.0853, "num_word_context": 202.194, "num_word_doc": 49.827, "num_word_query": 23.2597, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4634.6286, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1912, "query_norm": 1.2887, "queue_k_norm": 1.3702, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3204, "sent_len_1": 66.7305, "sent_len_max_0": 127.37, "sent_len_max_1": 189.4013, "stdk": 0.0468, "stdq": 0.0419, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 42200 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.9593, "doc_norm": 1.3686, "encoder_q-embeddings": 2715.8728, "encoder_q-layer.0": 1825.022, "encoder_q-layer.1": 1863.9052, "encoder_q-layer.10": 2539.8936, "encoder_q-layer.11": 6365.7524, "encoder_q-layer.2": 2089.0562, "encoder_q-layer.3": 2117.4578, "encoder_q-layer.4": 2093.6387, "encoder_q-layer.5": 2044.4429, "encoder_q-layer.6": 2263.9736, "encoder_q-layer.7": 2294.3992, "encoder_q-layer.8": 2587.1467, "encoder_q-layer.9": 2356.397, "epoch": 0.28, "inbatch_neg_score": 0.2008, "inbatch_pos_score": 0.7529, "learning_rate": 3.2055555555555556e-05, "loss": 3.9593, "norm_diff": 0.0604, "norm_loss": 0.0, "num_token_doc": 66.8031, "num_token_overlap": 11.6631, "num_token_query": 31.3378, "num_token_union": 65.0969, "num_word_context": 202.2396, "num_word_doc": 49.8148, "num_word_query": 23.2782, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4270.2624, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2, "query_norm": 1.3082, "queue_k_norm": 1.3691, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3378, "sent_len_1": 66.8031, "sent_len_max_0": 127.335, "sent_len_max_1": 190.5513, "stdk": 0.0468, "stdq": 0.0424, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 42300 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.9539, "doc_norm": 1.3669, "encoder_q-embeddings": 2369.5525, "encoder_q-layer.0": 1540.7566, "encoder_q-layer.1": 1607.7981, "encoder_q-layer.10": 2604.78, "encoder_q-layer.11": 6019.6499, "encoder_q-layer.2": 1710.3291, "encoder_q-layer.3": 1693.953, "encoder_q-layer.4": 1750.5425, "encoder_q-layer.5": 1729.3564, "encoder_q-layer.6": 1946.1316, "encoder_q-layer.7": 2137.2214, "encoder_q-layer.8": 2582.179, "encoder_q-layer.9": 2371.0376, "epoch": 0.28, "inbatch_neg_score": 0.1985, "inbatch_pos_score": 0.73, "learning_rate": 3.2000000000000005e-05, "loss": 3.9539, "norm_diff": 0.0563, "norm_loss": 0.0, "num_token_doc": 66.7393, "num_token_overlap": 11.6566, "num_token_query": 31.3994, "num_token_union": 65.1224, "num_word_context": 202.3579, "num_word_doc": 49.7846, "num_word_query": 23.3088, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3962.8958, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1975, "query_norm": 1.3106, "queue_k_norm": 1.3692, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3994, "sent_len_1": 66.7393, "sent_len_max_0": 127.4688, "sent_len_max_1": 190.1538, "stdk": 0.0468, "stdq": 0.0427, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 42400 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 3.9653, "doc_norm": 1.3786, "encoder_q-embeddings": 2480.7698, "encoder_q-layer.0": 1607.9827, "encoder_q-layer.1": 1668.0978, "encoder_q-layer.10": 2507.0237, "encoder_q-layer.11": 6235.1553, "encoder_q-layer.2": 1895.1941, "encoder_q-layer.3": 1998.5969, "encoder_q-layer.4": 2065.9158, "encoder_q-layer.5": 2092.2988, "encoder_q-layer.6": 2297.2979, "encoder_q-layer.7": 2495.4854, "encoder_q-layer.8": 2673.3845, "encoder_q-layer.9": 2476.6841, "epoch": 0.28, "inbatch_neg_score": 0.2027, "inbatch_pos_score": 0.7441, "learning_rate": 3.194444444444444e-05, "loss": 3.9653, "norm_diff": 0.0673, "norm_loss": 0.0, "num_token_doc": 66.7212, "num_token_overlap": 11.5858, "num_token_query": 31.1513, "num_token_union": 65.0333, "num_word_context": 202.1141, "num_word_doc": 49.7828, "num_word_query": 23.1128, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4144.1181, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2018, "query_norm": 1.3113, "queue_k_norm": 1.3704, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.1513, "sent_len_1": 66.7212, "sent_len_max_0": 127.4587, "sent_len_max_1": 189.3313, "stdk": 0.0472, "stdq": 0.0427, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 42500 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.9418, "doc_norm": 1.3689, "encoder_q-embeddings": 3794.1968, "encoder_q-layer.0": 2769.7358, "encoder_q-layer.1": 3113.6187, "encoder_q-layer.10": 2590.2046, "encoder_q-layer.11": 6329.6567, "encoder_q-layer.2": 3382.7935, "encoder_q-layer.3": 3247.3506, "encoder_q-layer.4": 3021.7756, "encoder_q-layer.5": 2828.7056, "encoder_q-layer.6": 2783.6689, "encoder_q-layer.7": 2466.0669, "encoder_q-layer.8": 2746.6284, "encoder_q-layer.9": 2442.845, "epoch": 0.28, "inbatch_neg_score": 0.1995, "inbatch_pos_score": 0.7412, "learning_rate": 3.188888888888889e-05, "loss": 3.9418, "norm_diff": 0.076, "norm_loss": 0.0, "num_token_doc": 66.6472, "num_token_overlap": 11.6333, "num_token_query": 31.3267, "num_token_union": 65.0554, "num_word_context": 202.2711, "num_word_doc": 49.7594, "num_word_query": 23.2673, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5122.1382, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1985, "query_norm": 1.2928, "queue_k_norm": 1.3714, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3267, "sent_len_1": 66.6472, "sent_len_max_0": 127.4425, "sent_len_max_1": 189.7438, "stdk": 0.0468, "stdq": 0.042, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 42600 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.9456, "doc_norm": 1.3726, "encoder_q-embeddings": 2920.8071, "encoder_q-layer.0": 1982.8514, "encoder_q-layer.1": 2001.6207, "encoder_q-layer.10": 2760.738, "encoder_q-layer.11": 6390.2583, "encoder_q-layer.2": 2348.7375, "encoder_q-layer.3": 2382.8115, "encoder_q-layer.4": 2567.2859, "encoder_q-layer.5": 2531.5352, "encoder_q-layer.6": 2640.7556, "encoder_q-layer.7": 2686.311, "encoder_q-layer.8": 3023.7751, "encoder_q-layer.9": 2540.9497, "epoch": 0.28, "inbatch_neg_score": 0.196, "inbatch_pos_score": 0.7417, "learning_rate": 3.183333333333334e-05, "loss": 3.9456, "norm_diff": 0.0974, "norm_loss": 0.0, "num_token_doc": 66.5502, "num_token_overlap": 11.7025, "num_token_query": 31.4101, "num_token_union": 64.9848, "num_word_context": 201.8645, "num_word_doc": 49.6405, "num_word_query": 23.3404, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4549.8379, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1969, "query_norm": 1.2752, "queue_k_norm": 1.3703, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4101, "sent_len_1": 66.5502, "sent_len_max_0": 127.43, "sent_len_max_1": 189.1825, "stdk": 0.047, "stdq": 0.0416, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 42700 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 3.9525, "doc_norm": 1.3696, "encoder_q-embeddings": 4231.7046, "encoder_q-layer.0": 2920.1567, "encoder_q-layer.1": 3002.8174, "encoder_q-layer.10": 2510.4546, "encoder_q-layer.11": 6322.1699, "encoder_q-layer.2": 3578.2954, "encoder_q-layer.3": 3507.7217, "encoder_q-layer.4": 3072.0872, "encoder_q-layer.5": 2573.5234, "encoder_q-layer.6": 2557.8611, "encoder_q-layer.7": 2416.7839, "encoder_q-layer.8": 2574.8987, "encoder_q-layer.9": 2256.5964, "epoch": 0.28, "inbatch_neg_score": 0.1976, "inbatch_pos_score": 0.7285, "learning_rate": 3.177777777777778e-05, "loss": 3.9525, "norm_diff": 0.097, "norm_loss": 0.0, "num_token_doc": 66.7928, "num_token_overlap": 11.6837, "num_token_query": 31.3211, "num_token_union": 65.1003, "num_word_context": 202.4912, "num_word_doc": 49.8534, "num_word_query": 23.2525, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5238.5061, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1968, "query_norm": 1.2726, "queue_k_norm": 1.3716, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3211, "sent_len_1": 66.7928, "sent_len_max_0": 127.54, "sent_len_max_1": 188.6438, "stdk": 0.0468, "stdq": 0.0414, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 42800 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 3.9573, "doc_norm": 1.3752, "encoder_q-embeddings": 3360.0349, "encoder_q-layer.0": 2186.8496, "encoder_q-layer.1": 2390.9087, "encoder_q-layer.10": 2578.6042, "encoder_q-layer.11": 6113.6494, "encoder_q-layer.2": 2833.2078, "encoder_q-layer.3": 2764.1074, "encoder_q-layer.4": 2875.0935, "encoder_q-layer.5": 2686.9619, "encoder_q-layer.6": 2737.875, "encoder_q-layer.7": 2652.511, "encoder_q-layer.8": 2868.3721, "encoder_q-layer.9": 2533.2783, "epoch": 0.28, "inbatch_neg_score": 0.1963, "inbatch_pos_score": 0.7329, "learning_rate": 3.1722222222222224e-05, "loss": 3.9573, "norm_diff": 0.0671, "norm_loss": 0.0, "num_token_doc": 66.819, "num_token_overlap": 11.6712, "num_token_query": 31.5521, "num_token_union": 65.2937, "num_word_context": 202.3444, "num_word_doc": 49.8425, "num_word_query": 23.4342, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4700.637, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1959, "query_norm": 1.3081, "queue_k_norm": 1.3709, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5521, "sent_len_1": 66.819, "sent_len_max_0": 127.565, "sent_len_max_1": 190.6, "stdk": 0.0471, "stdq": 0.0428, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 42900 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.9504, "doc_norm": 1.3702, "encoder_q-embeddings": 2702.7056, "encoder_q-layer.0": 1724.4336, "encoder_q-layer.1": 1840.4644, "encoder_q-layer.10": 2623.5117, "encoder_q-layer.11": 6231.0376, "encoder_q-layer.2": 2092.3943, "encoder_q-layer.3": 2161.7686, "encoder_q-layer.4": 2275.6885, "encoder_q-layer.5": 2157.4143, "encoder_q-layer.6": 2281.3037, "encoder_q-layer.7": 2352.1978, "encoder_q-layer.8": 2610.4639, "encoder_q-layer.9": 2414.4724, "epoch": 0.28, "inbatch_neg_score": 0.1975, "inbatch_pos_score": 0.7637, "learning_rate": 3.1666666666666666e-05, "loss": 3.9504, "norm_diff": 0.056, "norm_loss": 0.0, "num_token_doc": 66.8149, "num_token_overlap": 11.7249, "num_token_query": 31.5327, "num_token_union": 65.2197, "num_word_context": 202.7621, "num_word_doc": 49.8466, "num_word_query": 23.4467, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4337.865, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1974, "query_norm": 1.3142, "queue_k_norm": 1.3704, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5327, "sent_len_1": 66.8149, "sent_len_max_0": 127.5362, "sent_len_max_1": 188.8162, "stdk": 0.0469, "stdq": 0.0429, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 43000 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.9268, "doc_norm": 1.3674, "encoder_q-embeddings": 2640.7046, "encoder_q-layer.0": 1734.4872, "encoder_q-layer.1": 1901.9331, "encoder_q-layer.10": 2622.9653, "encoder_q-layer.11": 6010.1914, "encoder_q-layer.2": 2164.8467, "encoder_q-layer.3": 2325.4678, "encoder_q-layer.4": 2430.1914, "encoder_q-layer.5": 2262.8445, "encoder_q-layer.6": 2502.0457, "encoder_q-layer.7": 2402.7368, "encoder_q-layer.8": 2640.3743, "encoder_q-layer.9": 2356.5613, "epoch": 0.28, "inbatch_neg_score": 0.1958, "inbatch_pos_score": 0.7388, "learning_rate": 3.1611111111111115e-05, "loss": 3.9268, "norm_diff": 0.0575, "norm_loss": 0.0, "num_token_doc": 67.2232, "num_token_overlap": 11.666, "num_token_query": 31.3474, "num_token_union": 65.4119, "num_word_context": 202.6719, "num_word_doc": 50.1779, "num_word_query": 23.2698, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4290.3219, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1963, "query_norm": 1.3098, "queue_k_norm": 1.372, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3474, "sent_len_1": 67.2232, "sent_len_max_0": 127.5, "sent_len_max_1": 188.9613, "stdk": 0.0468, "stdq": 0.0426, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 43100 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.9556, "doc_norm": 1.3653, "encoder_q-embeddings": 3187.0183, "encoder_q-layer.0": 2206.5151, "encoder_q-layer.1": 2200.6401, "encoder_q-layer.10": 2792.8909, "encoder_q-layer.11": 6314.8301, "encoder_q-layer.2": 2517.5449, "encoder_q-layer.3": 2870.5146, "encoder_q-layer.4": 3026.24, "encoder_q-layer.5": 2594.3972, "encoder_q-layer.6": 2456.6501, "encoder_q-layer.7": 2570.4678, "encoder_q-layer.8": 3112.8562, "encoder_q-layer.9": 2610.8257, "epoch": 0.28, "inbatch_neg_score": 0.1966, "inbatch_pos_score": 0.729, "learning_rate": 3.155555555555556e-05, "loss": 3.9556, "norm_diff": 0.0685, "norm_loss": 0.0, "num_token_doc": 66.6711, "num_token_overlap": 11.6513, "num_token_query": 31.336, "num_token_union": 65.0699, "num_word_context": 202.4981, "num_word_doc": 49.7793, "num_word_query": 23.268, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4752.06, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1969, "query_norm": 1.2968, "queue_k_norm": 1.372, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.336, "sent_len_1": 66.6711, "sent_len_max_0": 127.5575, "sent_len_max_1": 187.985, "stdk": 0.0467, "stdq": 0.0422, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 43200 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 3.9392, "doc_norm": 1.3801, "encoder_q-embeddings": 2949.2808, "encoder_q-layer.0": 1994.9994, "encoder_q-layer.1": 2256.6199, "encoder_q-layer.10": 2439.8413, "encoder_q-layer.11": 6274.1689, "encoder_q-layer.2": 2575.6426, "encoder_q-layer.3": 2705.9832, "encoder_q-layer.4": 2736.2185, "encoder_q-layer.5": 2560.6995, "encoder_q-layer.6": 2786.5757, "encoder_q-layer.7": 3222.0964, "encoder_q-layer.8": 3221.7478, "encoder_q-layer.9": 2420.5732, "epoch": 0.28, "inbatch_neg_score": 0.2006, "inbatch_pos_score": 0.7583, "learning_rate": 3.15e-05, "loss": 3.9392, "norm_diff": 0.0602, "norm_loss": 0.0, "num_token_doc": 66.8417, "num_token_overlap": 11.6718, "num_token_query": 31.2011, "num_token_union": 65.0519, "num_word_context": 202.2699, "num_word_doc": 49.9354, "num_word_query": 23.1613, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4642.296, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2019, "query_norm": 1.3199, "queue_k_norm": 1.3724, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2011, "sent_len_1": 66.8417, "sent_len_max_0": 127.4275, "sent_len_max_1": 187.46, "stdk": 0.0473, "stdq": 0.0429, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 43300 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.9554, "doc_norm": 1.3778, "encoder_q-embeddings": 5826.2134, "encoder_q-layer.0": 3677.8813, "encoder_q-layer.1": 3468.8228, "encoder_q-layer.10": 2477.7002, "encoder_q-layer.11": 6007.3857, "encoder_q-layer.2": 3753.4197, "encoder_q-layer.3": 3499.4949, "encoder_q-layer.4": 4051.9011, "encoder_q-layer.5": 3731.3494, "encoder_q-layer.6": 4533.0547, "encoder_q-layer.7": 4384.2451, "encoder_q-layer.8": 3899.3521, "encoder_q-layer.9": 2355.6057, "epoch": 0.28, "inbatch_neg_score": 0.1966, "inbatch_pos_score": 0.7515, "learning_rate": 3.144444444444445e-05, "loss": 3.9554, "norm_diff": 0.066, "norm_loss": 0.0, "num_token_doc": 66.7278, "num_token_overlap": 11.6077, "num_token_query": 31.2135, "num_token_union": 65.0535, "num_word_context": 202.3794, "num_word_doc": 49.8057, "num_word_query": 23.1798, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6451.4096, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1969, "query_norm": 1.3118, "queue_k_norm": 1.3731, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2135, "sent_len_1": 66.7278, "sent_len_max_0": 127.4275, "sent_len_max_1": 186.86, "stdk": 0.0472, "stdq": 0.0429, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 43400 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.9242, "doc_norm": 1.3734, "encoder_q-embeddings": 5939.5327, "encoder_q-layer.0": 4258.0088, "encoder_q-layer.1": 4409.3496, "encoder_q-layer.10": 2420.0769, "encoder_q-layer.11": 6047.2607, "encoder_q-layer.2": 4514.1963, "encoder_q-layer.3": 4594.9575, "encoder_q-layer.4": 4869.7305, "encoder_q-layer.5": 4512.5371, "encoder_q-layer.6": 4003.832, "encoder_q-layer.7": 3739.2805, "encoder_q-layer.8": 3023.6904, "encoder_q-layer.9": 2382.573, "epoch": 0.28, "inbatch_neg_score": 0.2033, "inbatch_pos_score": 0.7549, "learning_rate": 3.138888888888889e-05, "loss": 3.9242, "norm_diff": 0.0742, "norm_loss": 0.0, "num_token_doc": 66.7543, "num_token_overlap": 11.6968, "num_token_query": 31.4675, "num_token_union": 65.1282, "num_word_context": 202.0931, "num_word_doc": 49.7754, "num_word_query": 23.3714, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6689.9966, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2028, "query_norm": 1.2991, "queue_k_norm": 1.3713, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4675, "sent_len_1": 66.7543, "sent_len_max_0": 127.34, "sent_len_max_1": 190.8738, "stdk": 0.047, "stdq": 0.0422, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 43500 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.9421, "doc_norm": 1.3693, "encoder_q-embeddings": 2333.3135, "encoder_q-layer.0": 1625.5532, "encoder_q-layer.1": 1600.6198, "encoder_q-layer.10": 2417.5349, "encoder_q-layer.11": 5952.1689, "encoder_q-layer.2": 1785.056, "encoder_q-layer.3": 1954.2163, "encoder_q-layer.4": 1938.2855, "encoder_q-layer.5": 1860.2245, "encoder_q-layer.6": 2103.8066, "encoder_q-layer.7": 2212.5962, "encoder_q-layer.8": 2448.3862, "encoder_q-layer.9": 2238.885, "epoch": 0.28, "inbatch_neg_score": 0.1994, "inbatch_pos_score": 0.7515, "learning_rate": 3.1333333333333334e-05, "loss": 3.9421, "norm_diff": 0.0863, "norm_loss": 0.0, "num_token_doc": 66.945, "num_token_overlap": 11.6984, "num_token_query": 31.3925, "num_token_union": 65.2244, "num_word_context": 202.2182, "num_word_doc": 50.0086, "num_word_query": 23.3199, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3944.1005, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1997, "query_norm": 1.283, "queue_k_norm": 1.3732, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3925, "sent_len_1": 66.945, "sent_len_max_0": 127.3863, "sent_len_max_1": 189.0125, "stdk": 0.0468, "stdq": 0.0416, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 43600 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.9504, "doc_norm": 1.3772, "encoder_q-embeddings": 5344.355, "encoder_q-layer.0": 3585.834, "encoder_q-layer.1": 3930.6948, "encoder_q-layer.10": 2760.8022, "encoder_q-layer.11": 6685.543, "encoder_q-layer.2": 4655.2524, "encoder_q-layer.3": 4474.5923, "encoder_q-layer.4": 4486.9951, "encoder_q-layer.5": 4357.0352, "encoder_q-layer.6": 4876.6387, "encoder_q-layer.7": 4526.8555, "encoder_q-layer.8": 4039.438, "encoder_q-layer.9": 2615.762, "epoch": 0.28, "inbatch_neg_score": 0.1987, "inbatch_pos_score": 0.7466, "learning_rate": 3.1277777777777776e-05, "loss": 3.9504, "norm_diff": 0.0679, "norm_loss": 0.0, "num_token_doc": 66.6705, "num_token_overlap": 11.6348, "num_token_query": 31.2571, "num_token_union": 65.0429, "num_word_context": 202.1887, "num_word_doc": 49.7488, "num_word_query": 23.2103, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6752.5227, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1992, "query_norm": 1.3092, "queue_k_norm": 1.3729, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2571, "sent_len_1": 66.6705, "sent_len_max_0": 127.5713, "sent_len_max_1": 190.1012, "stdk": 0.0471, "stdq": 0.0429, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 43700 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.9396, "doc_norm": 1.3753, "encoder_q-embeddings": 2932.4539, "encoder_q-layer.0": 2117.8308, "encoder_q-layer.1": 2195.6602, "encoder_q-layer.10": 2729.8311, "encoder_q-layer.11": 6710.7246, "encoder_q-layer.2": 2538.3757, "encoder_q-layer.3": 2384.1177, "encoder_q-layer.4": 2339.7053, "encoder_q-layer.5": 2213.543, "encoder_q-layer.6": 2402.345, "encoder_q-layer.7": 2528.3337, "encoder_q-layer.8": 2821.6011, "encoder_q-layer.9": 2556.437, "epoch": 0.29, "inbatch_neg_score": 0.1972, "inbatch_pos_score": 0.7471, "learning_rate": 3.1222222222222225e-05, "loss": 3.9396, "norm_diff": 0.0943, "norm_loss": 0.0, "num_token_doc": 66.9224, "num_token_overlap": 11.6721, "num_token_query": 31.3656, "num_token_union": 65.2374, "num_word_context": 202.8036, "num_word_doc": 49.9374, "num_word_query": 23.2944, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4601.9841, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1975, "query_norm": 1.281, "queue_k_norm": 1.3743, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3656, "sent_len_1": 66.9224, "sent_len_max_0": 127.47, "sent_len_max_1": 190.215, "stdk": 0.047, "stdq": 0.0417, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 43800 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 3.9573, "doc_norm": 1.3702, "encoder_q-embeddings": 5156.48, "encoder_q-layer.0": 3320.5298, "encoder_q-layer.1": 3449.907, "encoder_q-layer.10": 5335.3525, "encoder_q-layer.11": 13094.6826, "encoder_q-layer.2": 3994.209, "encoder_q-layer.3": 3969.7263, "encoder_q-layer.4": 4176.4805, "encoder_q-layer.5": 3894.375, "encoder_q-layer.6": 4384.7993, "encoder_q-layer.7": 4881.0479, "encoder_q-layer.8": 5612.2988, "encoder_q-layer.9": 5062.8262, "epoch": 0.29, "inbatch_neg_score": 0.1899, "inbatch_pos_score": 0.7236, "learning_rate": 3.116666666666667e-05, "loss": 3.9573, "norm_diff": 0.0711, "norm_loss": 0.0, "num_token_doc": 66.8819, "num_token_overlap": 11.7291, "num_token_query": 31.5254, "num_token_union": 65.2348, "num_word_context": 202.7136, "num_word_doc": 49.8566, "num_word_query": 23.4196, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8607.7967, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1901, "query_norm": 1.2991, "queue_k_norm": 1.3729, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.5254, "sent_len_1": 66.8819, "sent_len_max_0": 127.5225, "sent_len_max_1": 191.8338, "stdk": 0.0468, "stdq": 0.0426, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 43900 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.9235, "doc_norm": 1.3755, "encoder_q-embeddings": 5336.7056, "encoder_q-layer.0": 3634.6772, "encoder_q-layer.1": 3841.3062, "encoder_q-layer.10": 5293.7871, "encoder_q-layer.11": 12698.6758, "encoder_q-layer.2": 4422.3145, "encoder_q-layer.3": 4437.9141, "encoder_q-layer.4": 4312.0186, "encoder_q-layer.5": 4375.5024, "encoder_q-layer.6": 4426.915, "encoder_q-layer.7": 4869.7261, "encoder_q-layer.8": 5681.8086, "encoder_q-layer.9": 4988.6631, "epoch": 0.29, "inbatch_neg_score": 0.1914, "inbatch_pos_score": 0.7383, "learning_rate": 3.111111111111111e-05, "loss": 3.9235, "norm_diff": 0.0848, "norm_loss": 0.0, "num_token_doc": 66.9681, "num_token_overlap": 11.7254, "num_token_query": 31.4614, "num_token_union": 65.2363, "num_word_context": 202.4367, "num_word_doc": 49.9846, "num_word_query": 23.3465, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8529.75, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1908, "query_norm": 1.2908, "queue_k_norm": 1.3734, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4614, "sent_len_1": 66.9681, "sent_len_max_0": 127.4163, "sent_len_max_1": 190.315, "stdk": 0.0471, "stdq": 0.0421, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 44000 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.9426, "doc_norm": 1.3731, "encoder_q-embeddings": 8001.542, "encoder_q-layer.0": 5553.9888, "encoder_q-layer.1": 5855.1226, "encoder_q-layer.10": 4892.2749, "encoder_q-layer.11": 12247.5049, "encoder_q-layer.2": 6851.9165, "encoder_q-layer.3": 7907.2617, "encoder_q-layer.4": 8225.5664, "encoder_q-layer.5": 9295.0166, "encoder_q-layer.6": 9668.3584, "encoder_q-layer.7": 8688.2588, "encoder_q-layer.8": 7256.4131, "encoder_q-layer.9": 4803.0815, "epoch": 0.29, "inbatch_neg_score": 0.1864, "inbatch_pos_score": 0.7256, "learning_rate": 3.105555555555555e-05, "loss": 3.9426, "norm_diff": 0.0938, "norm_loss": 0.0, "num_token_doc": 66.6028, "num_token_overlap": 11.6373, "num_token_query": 31.3439, "num_token_union": 65.0368, "num_word_context": 202.0515, "num_word_doc": 49.6987, "num_word_query": 23.2862, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11892.0193, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.187, "query_norm": 1.2793, "queue_k_norm": 1.3732, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3439, "sent_len_1": 66.6028, "sent_len_max_0": 127.6225, "sent_len_max_1": 188.585, "stdk": 0.047, "stdq": 0.0416, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 44100 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.9493, "doc_norm": 1.3665, "encoder_q-embeddings": 4652.0947, "encoder_q-layer.0": 3214.748, "encoder_q-layer.1": 3310.3882, "encoder_q-layer.10": 5140.8638, "encoder_q-layer.11": 12039.9238, "encoder_q-layer.2": 3531.6479, "encoder_q-layer.3": 3600.8767, "encoder_q-layer.4": 3557.6772, "encoder_q-layer.5": 3767.1748, "encoder_q-layer.6": 4163.75, "encoder_q-layer.7": 4452.4434, "encoder_q-layer.8": 5070.1626, "encoder_q-layer.9": 4621.8721, "epoch": 0.29, "inbatch_neg_score": 0.192, "inbatch_pos_score": 0.7437, "learning_rate": 3.1e-05, "loss": 3.9493, "norm_diff": 0.0512, "norm_loss": 0.0, "num_token_doc": 66.6857, "num_token_overlap": 11.6463, "num_token_query": 31.2884, "num_token_union": 65.0449, "num_word_context": 202.5527, "num_word_doc": 49.7579, "num_word_query": 23.2269, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7908.5607, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1904, "query_norm": 1.3153, "queue_k_norm": 1.3714, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2884, "sent_len_1": 66.6857, "sent_len_max_0": 127.3988, "sent_len_max_1": 190.8875, "stdk": 0.0467, "stdq": 0.0428, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 44200 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 3.9282, "doc_norm": 1.3674, "encoder_q-embeddings": 12415.2031, "encoder_q-layer.0": 8807.5, "encoder_q-layer.1": 10093.3623, "encoder_q-layer.10": 5086.8188, "encoder_q-layer.11": 13421.7676, "encoder_q-layer.2": 10398.7402, "encoder_q-layer.3": 11608.1826, "encoder_q-layer.4": 11065.7539, "encoder_q-layer.5": 11347.4092, "encoder_q-layer.6": 10040.9775, "encoder_q-layer.7": 10088.1895, "encoder_q-layer.8": 8823.6621, "encoder_q-layer.9": 5508.0356, "epoch": 0.29, "inbatch_neg_score": 0.1893, "inbatch_pos_score": 0.731, "learning_rate": 3.094444444444445e-05, "loss": 3.9282, "norm_diff": 0.0689, "norm_loss": 0.0, "num_token_doc": 66.9039, "num_token_overlap": 11.7143, "num_token_query": 31.3502, "num_token_union": 65.1595, "num_word_context": 202.3912, "num_word_doc": 49.9439, "num_word_query": 23.278, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15408.0592, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.188, "query_norm": 1.2985, "queue_k_norm": 1.3742, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3502, "sent_len_1": 66.9039, "sent_len_max_0": 127.4275, "sent_len_max_1": 188.295, "stdk": 0.0468, "stdq": 0.0422, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 44300 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.9407, "doc_norm": 1.366, "encoder_q-embeddings": 9425.4121, "encoder_q-layer.0": 6531.0034, "encoder_q-layer.1": 7619.4883, "encoder_q-layer.10": 4807.7524, "encoder_q-layer.11": 12565.1045, "encoder_q-layer.2": 8155.8154, "encoder_q-layer.3": 9021.7891, "encoder_q-layer.4": 8449.7988, "encoder_q-layer.5": 7517.8115, "encoder_q-layer.6": 7185.3804, "encoder_q-layer.7": 6954.1055, "encoder_q-layer.8": 6767.5542, "encoder_q-layer.9": 5001.209, "epoch": 0.29, "inbatch_neg_score": 0.1865, "inbatch_pos_score": 0.7256, "learning_rate": 3.088888888888889e-05, "loss": 3.9407, "norm_diff": 0.0687, "norm_loss": 0.0, "num_token_doc": 66.9874, "num_token_overlap": 11.6207, "num_token_query": 31.1803, "num_token_union": 65.1658, "num_word_context": 202.5648, "num_word_doc": 49.972, "num_word_query": 23.1562, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12109.5025, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1868, "query_norm": 1.2973, "queue_k_norm": 1.3738, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.1803, "sent_len_1": 66.9874, "sent_len_max_0": 127.5138, "sent_len_max_1": 189.9712, "stdk": 0.0468, "stdq": 0.0422, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 44400 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.9158, "doc_norm": 1.3745, "encoder_q-embeddings": 3120.208, "encoder_q-layer.0": 2186.4746, "encoder_q-layer.1": 2354.0405, "encoder_q-layer.10": 2456.4153, "encoder_q-layer.11": 6182.6685, "encoder_q-layer.2": 2709.926, "encoder_q-layer.3": 2892.749, "encoder_q-layer.4": 3127.3645, "encoder_q-layer.5": 3045.6687, "encoder_q-layer.6": 3087.4734, "encoder_q-layer.7": 3013.127, "encoder_q-layer.8": 2853.7544, "encoder_q-layer.9": 2452.1699, "epoch": 0.29, "inbatch_neg_score": 0.1895, "inbatch_pos_score": 0.7324, "learning_rate": 3.0833333333333335e-05, "loss": 3.9158, "norm_diff": 0.0643, "norm_loss": 0.0, "num_token_doc": 66.7973, "num_token_overlap": 11.6803, "num_token_query": 31.483, "num_token_union": 65.1616, "num_word_context": 202.4366, "num_word_doc": 49.8203, "num_word_query": 23.4013, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4822.4065, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.188, "query_norm": 1.3102, "queue_k_norm": 1.3722, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.483, "sent_len_1": 66.7973, "sent_len_max_0": 127.3912, "sent_len_max_1": 191.3812, "stdk": 0.0471, "stdq": 0.0428, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 44500 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.9384, "doc_norm": 1.3728, "encoder_q-embeddings": 2533.772, "encoder_q-layer.0": 1687.1108, "encoder_q-layer.1": 1791.9888, "encoder_q-layer.10": 2311.6743, "encoder_q-layer.11": 6142.2993, "encoder_q-layer.2": 2033.1184, "encoder_q-layer.3": 2097.2871, "encoder_q-layer.4": 2140.3386, "encoder_q-layer.5": 2063.3464, "encoder_q-layer.6": 2202.8523, "encoder_q-layer.7": 2388.9644, "encoder_q-layer.8": 2511.5833, "encoder_q-layer.9": 2232.5464, "epoch": 0.29, "inbatch_neg_score": 0.1916, "inbatch_pos_score": 0.748, "learning_rate": 3.077777777777778e-05, "loss": 3.9384, "norm_diff": 0.0696, "norm_loss": 0.0, "num_token_doc": 66.7577, "num_token_overlap": 11.6631, "num_token_query": 31.3462, "num_token_union": 65.0762, "num_word_context": 202.0512, "num_word_doc": 49.8688, "num_word_query": 23.2969, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4141.6876, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1913, "query_norm": 1.3032, "queue_k_norm": 1.3732, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3462, "sent_len_1": 66.7577, "sent_len_max_0": 127.4675, "sent_len_max_1": 188.0462, "stdk": 0.047, "stdq": 0.0424, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 44600 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.9375, "doc_norm": 1.3792, "encoder_q-embeddings": 7988.9497, "encoder_q-layer.0": 6153.2593, "encoder_q-layer.1": 6647.8486, "encoder_q-layer.10": 2585.0645, "encoder_q-layer.11": 6214.7207, "encoder_q-layer.2": 6868.6426, "encoder_q-layer.3": 6358.1675, "encoder_q-layer.4": 5108.54, "encoder_q-layer.5": 3660.1323, "encoder_q-layer.6": 3314.3154, "encoder_q-layer.7": 3043.4272, "encoder_q-layer.8": 2961.8899, "encoder_q-layer.9": 2489.5679, "epoch": 0.29, "inbatch_neg_score": 0.1876, "inbatch_pos_score": 0.7437, "learning_rate": 3.0722222222222227e-05, "loss": 3.9375, "norm_diff": 0.1011, "norm_loss": 0.0, "num_token_doc": 66.6858, "num_token_overlap": 11.6452, "num_token_query": 31.3567, "num_token_union": 65.0888, "num_word_context": 201.9523, "num_word_doc": 49.7178, "num_word_query": 23.2768, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8227.7126, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1877, "query_norm": 1.2781, "queue_k_norm": 1.3747, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3567, "sent_len_1": 66.6858, "sent_len_max_0": 127.4712, "sent_len_max_1": 188.93, "stdk": 0.0473, "stdq": 0.0417, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 44700 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.9384, "doc_norm": 1.3679, "encoder_q-embeddings": 3632.2412, "encoder_q-layer.0": 2539.6533, "encoder_q-layer.1": 2737.6113, "encoder_q-layer.10": 2572.3594, "encoder_q-layer.11": 6138.9287, "encoder_q-layer.2": 3037.7632, "encoder_q-layer.3": 3217.6704, "encoder_q-layer.4": 3179.3516, "encoder_q-layer.5": 3146.8516, "encoder_q-layer.6": 3062.9368, "encoder_q-layer.7": 2966.5613, "encoder_q-layer.8": 2813.9194, "encoder_q-layer.9": 2408.4158, "epoch": 0.29, "inbatch_neg_score": 0.1839, "inbatch_pos_score": 0.7285, "learning_rate": 3.066666666666667e-05, "loss": 3.9384, "norm_diff": 0.0911, "norm_loss": 0.0, "num_token_doc": 66.6935, "num_token_overlap": 11.6906, "num_token_query": 31.5052, "num_token_union": 65.1328, "num_word_context": 201.9943, "num_word_doc": 49.783, "num_word_query": 23.4078, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5105.3332, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1851, "query_norm": 1.2767, "queue_k_norm": 1.3734, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5052, "sent_len_1": 66.6935, "sent_len_max_0": 127.6025, "sent_len_max_1": 188.3787, "stdk": 0.0469, "stdq": 0.0418, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 44800 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.94, "doc_norm": 1.3712, "encoder_q-embeddings": 5822.7227, "encoder_q-layer.0": 4106.543, "encoder_q-layer.1": 4249.7695, "encoder_q-layer.10": 2888.916, "encoder_q-layer.11": 6654.04, "encoder_q-layer.2": 5293.5903, "encoder_q-layer.3": 5397.4546, "encoder_q-layer.4": 6290.1279, "encoder_q-layer.5": 5539.1411, "encoder_q-layer.6": 5553.6753, "encoder_q-layer.7": 5504.1401, "encoder_q-layer.8": 4503.0063, "encoder_q-layer.9": 2868.6992, "epoch": 0.29, "inbatch_neg_score": 0.1779, "inbatch_pos_score": 0.7075, "learning_rate": 3.061111111111111e-05, "loss": 3.94, "norm_diff": 0.1034, "norm_loss": 0.0, "num_token_doc": 66.8027, "num_token_overlap": 11.6412, "num_token_query": 31.3049, "num_token_union": 65.1508, "num_word_context": 202.4317, "num_word_doc": 49.8677, "num_word_query": 23.2637, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7533.5174, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1794, "query_norm": 1.2678, "queue_k_norm": 1.3718, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3049, "sent_len_1": 66.8027, "sent_len_max_0": 127.6025, "sent_len_max_1": 189.6337, "stdk": 0.047, "stdq": 0.0416, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 44900 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.9458, "doc_norm": 1.3718, "encoder_q-embeddings": 1519.5936, "encoder_q-layer.0": 1028.5142, "encoder_q-layer.1": 1146.7587, "encoder_q-layer.10": 1391.8306, "encoder_q-layer.11": 2978.3767, "encoder_q-layer.2": 1311.7479, "encoder_q-layer.3": 1422.0646, "encoder_q-layer.4": 1511.4696, "encoder_q-layer.5": 1396.8468, "encoder_q-layer.6": 1551.2908, "encoder_q-layer.7": 1625.1353, "encoder_q-layer.8": 1552.79, "encoder_q-layer.9": 1243.4746, "epoch": 0.29, "inbatch_neg_score": 0.179, "inbatch_pos_score": 0.73, "learning_rate": 3.055555555555556e-05, "loss": 3.9458, "norm_diff": 0.0815, "norm_loss": 0.0, "num_token_doc": 66.7247, "num_token_overlap": 11.6757, "num_token_query": 31.4036, "num_token_union": 65.1108, "num_word_context": 202.4364, "num_word_doc": 49.7879, "num_word_query": 23.3236, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2346.0381, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1799, "query_norm": 1.2902, "queue_k_norm": 1.3712, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4036, "sent_len_1": 66.7247, "sent_len_max_0": 127.3975, "sent_len_max_1": 189.6725, "stdk": 0.047, "stdq": 0.0424, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 45000 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.923, "doc_norm": 1.3758, "encoder_q-embeddings": 1354.0741, "encoder_q-layer.0": 961.9074, "encoder_q-layer.1": 1057.1051, "encoder_q-layer.10": 1265.4755, "encoder_q-layer.11": 3003.2205, "encoder_q-layer.2": 1175.0845, "encoder_q-layer.3": 1158.6289, "encoder_q-layer.4": 1215.6724, "encoder_q-layer.5": 1193.5175, "encoder_q-layer.6": 1446.9283, "encoder_q-layer.7": 1502.3019, "encoder_q-layer.8": 1489.2528, "encoder_q-layer.9": 1266.391, "epoch": 0.29, "inbatch_neg_score": 0.1795, "inbatch_pos_score": 0.7295, "learning_rate": 3.05e-05, "loss": 3.923, "norm_diff": 0.083, "norm_loss": 0.0, "num_token_doc": 66.7205, "num_token_overlap": 11.6462, "num_token_query": 31.3199, "num_token_union": 65.0759, "num_word_context": 202.214, "num_word_doc": 49.809, "num_word_query": 23.2606, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2217.4697, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1798, "query_norm": 1.2928, "queue_k_norm": 1.3713, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3199, "sent_len_1": 66.7205, "sent_len_max_0": 127.5212, "sent_len_max_1": 189.3525, "stdk": 0.0472, "stdq": 0.0424, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 45100 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.9026, "doc_norm": 1.3775, "encoder_q-embeddings": 7841.8979, "encoder_q-layer.0": 6060.375, "encoder_q-layer.1": 7255.5479, "encoder_q-layer.10": 1471.0515, "encoder_q-layer.11": 3192.2266, "encoder_q-layer.2": 7287.7217, "encoder_q-layer.3": 8516.1182, "encoder_q-layer.4": 9592.1797, "encoder_q-layer.5": 6477.9849, "encoder_q-layer.6": 4497.7261, "encoder_q-layer.7": 3362.9758, "encoder_q-layer.8": 2443.4387, "encoder_q-layer.9": 1478.0642, "epoch": 0.29, "inbatch_neg_score": 0.1875, "inbatch_pos_score": 0.7349, "learning_rate": 3.044444444444445e-05, "loss": 3.9026, "norm_diff": 0.0764, "norm_loss": 0.0, "num_token_doc": 67.0903, "num_token_overlap": 11.7002, "num_token_query": 31.2794, "num_token_union": 65.2652, "num_word_context": 202.5771, "num_word_doc": 50.0412, "num_word_query": 23.2342, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9054.2384, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1887, "query_norm": 1.3011, "queue_k_norm": 1.3722, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2794, "sent_len_1": 67.0903, "sent_len_max_0": 127.1188, "sent_len_max_1": 189.2775, "stdk": 0.0473, "stdq": 0.0427, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 45200 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 3.9217, "doc_norm": 1.3707, "encoder_q-embeddings": 1897.4819, "encoder_q-layer.0": 1315.9061, "encoder_q-layer.1": 1487.4131, "encoder_q-layer.10": 1241.0735, "encoder_q-layer.11": 3077.4407, "encoder_q-layer.2": 1765.4598, "encoder_q-layer.3": 1968.5945, "encoder_q-layer.4": 1997.1183, "encoder_q-layer.5": 1892.0443, "encoder_q-layer.6": 1871.1704, "encoder_q-layer.7": 1889.0052, "encoder_q-layer.8": 1845.8298, "encoder_q-layer.9": 1348.2703, "epoch": 0.29, "inbatch_neg_score": 0.1862, "inbatch_pos_score": 0.7227, "learning_rate": 3.0388888888888887e-05, "loss": 3.9217, "norm_diff": 0.0849, "norm_loss": 0.0, "num_token_doc": 66.9154, "num_token_overlap": 11.6655, "num_token_query": 31.3618, "num_token_union": 65.2151, "num_word_context": 202.307, "num_word_doc": 49.9078, "num_word_query": 23.2659, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2838.3206, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.186, "query_norm": 1.2858, "queue_k_norm": 1.3726, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3618, "sent_len_1": 66.9154, "sent_len_max_0": 127.43, "sent_len_max_1": 191.0075, "stdk": 0.047, "stdq": 0.0422, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 45300 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.9321, "doc_norm": 1.3736, "encoder_q-embeddings": 1169.3666, "encoder_q-layer.0": 821.0062, "encoder_q-layer.1": 1087.892, "encoder_q-layer.10": 594.6376, "encoder_q-layer.11": 1472.8965, "encoder_q-layer.2": 1361.4412, "encoder_q-layer.3": 1469.1953, "encoder_q-layer.4": 1450.9589, "encoder_q-layer.5": 1084.4579, "encoder_q-layer.6": 1121.1456, "encoder_q-layer.7": 1094.7883, "encoder_q-layer.8": 885.8029, "encoder_q-layer.9": 602.0385, "epoch": 0.3, "inbatch_neg_score": 0.1881, "inbatch_pos_score": 0.7407, "learning_rate": 3.0333333333333337e-05, "loss": 3.9321, "norm_diff": 0.0896, "norm_loss": 0.0, "num_token_doc": 66.8721, "num_token_overlap": 11.6171, "num_token_query": 31.2993, "num_token_union": 65.1675, "num_word_context": 202.3038, "num_word_doc": 49.9074, "num_word_query": 23.2459, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1708.9091, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1886, "query_norm": 1.2841, "queue_k_norm": 1.3706, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2993, "sent_len_1": 66.8721, "sent_len_max_0": 127.5062, "sent_len_max_1": 189.0675, "stdk": 0.0471, "stdq": 0.042, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 45400 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.9316, "doc_norm": 1.3702, "encoder_q-embeddings": 1466.334, "encoder_q-layer.0": 969.601, "encoder_q-layer.1": 1080.1417, "encoder_q-layer.10": 582.7865, "encoder_q-layer.11": 1461.4595, "encoder_q-layer.2": 1237.0632, "encoder_q-layer.3": 1237.7239, "encoder_q-layer.4": 1245.985, "encoder_q-layer.5": 1283.2535, "encoder_q-layer.6": 1146.2017, "encoder_q-layer.7": 1105.1533, "encoder_q-layer.8": 814.6083, "encoder_q-layer.9": 599.7059, "epoch": 0.3, "inbatch_neg_score": 0.1834, "inbatch_pos_score": 0.7168, "learning_rate": 3.0277777777777776e-05, "loss": 3.9316, "norm_diff": 0.0968, "norm_loss": 0.0, "num_token_doc": 66.8217, "num_token_overlap": 11.7227, "num_token_query": 31.5136, "num_token_union": 65.176, "num_word_context": 202.485, "num_word_doc": 49.8485, "num_word_query": 23.4137, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1704.4107, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1819, "query_norm": 1.2734, "queue_k_norm": 1.373, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.5136, "sent_len_1": 66.8217, "sent_len_max_0": 127.5613, "sent_len_max_1": 189.8025, "stdk": 0.047, "stdq": 0.0418, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 45500 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.9349, "doc_norm": 1.3731, "encoder_q-embeddings": 4657.2017, "encoder_q-layer.0": 2888.4534, "encoder_q-layer.1": 3543.5198, "encoder_q-layer.10": 616.3366, "encoder_q-layer.11": 1469.2512, "encoder_q-layer.2": 3949.0249, "encoder_q-layer.3": 4339.7017, "encoder_q-layer.4": 4991.3662, "encoder_q-layer.5": 4501.1157, "encoder_q-layer.6": 4120.2222, "encoder_q-layer.7": 2982.2988, "encoder_q-layer.8": 1724.9576, "encoder_q-layer.9": 801.2102, "epoch": 0.3, "inbatch_neg_score": 0.182, "inbatch_pos_score": 0.73, "learning_rate": 3.0222222222222225e-05, "loss": 3.9349, "norm_diff": 0.0923, "norm_loss": 0.0, "num_token_doc": 66.8334, "num_token_overlap": 11.7012, "num_token_query": 31.3845, "num_token_union": 65.1519, "num_word_context": 202.2755, "num_word_doc": 49.8769, "num_word_query": 23.3261, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5268.2235, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1823, "query_norm": 1.2808, "queue_k_norm": 1.3703, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3845, "sent_len_1": 66.8334, "sent_len_max_0": 127.3213, "sent_len_max_1": 189.8587, "stdk": 0.0471, "stdq": 0.0422, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 45600 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 3.9204, "doc_norm": 1.372, "encoder_q-embeddings": 673.7921, "encoder_q-layer.0": 456.2935, "encoder_q-layer.1": 488.4761, "encoder_q-layer.10": 623.6575, "encoder_q-layer.11": 1592.4226, "encoder_q-layer.2": 537.782, "encoder_q-layer.3": 583.3336, "encoder_q-layer.4": 590.5917, "encoder_q-layer.5": 598.5745, "encoder_q-layer.6": 610.9893, "encoder_q-layer.7": 608.4494, "encoder_q-layer.8": 666.416, "encoder_q-layer.9": 597.9041, "epoch": 0.3, "inbatch_neg_score": 0.1815, "inbatch_pos_score": 0.7241, "learning_rate": 3.016666666666667e-05, "loss": 3.9204, "norm_diff": 0.0838, "norm_loss": 0.0, "num_token_doc": 66.7115, "num_token_overlap": 11.6467, "num_token_query": 31.4099, "num_token_union": 65.098, "num_word_context": 202.3051, "num_word_doc": 49.7378, "num_word_query": 23.3349, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1096.384, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1815, "query_norm": 1.2881, "queue_k_norm": 1.3713, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4099, "sent_len_1": 66.7115, "sent_len_max_0": 127.6275, "sent_len_max_1": 191.12, "stdk": 0.0471, "stdq": 0.0425, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 45700 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.9159, "doc_norm": 1.3694, "encoder_q-embeddings": 590.9874, "encoder_q-layer.0": 403.0907, "encoder_q-layer.1": 420.0121, "encoder_q-layer.10": 633.3214, "encoder_q-layer.11": 1627.8854, "encoder_q-layer.2": 474.3808, "encoder_q-layer.3": 493.326, "encoder_q-layer.4": 498.657, "encoder_q-layer.5": 513.9003, "encoder_q-layer.6": 593.1943, "encoder_q-layer.7": 632.8145, "encoder_q-layer.8": 668.6746, "encoder_q-layer.9": 609.1573, "epoch": 0.3, "inbatch_neg_score": 0.1811, "inbatch_pos_score": 0.7378, "learning_rate": 3.0111111111111113e-05, "loss": 3.9159, "norm_diff": 0.0727, "norm_loss": 0.0, "num_token_doc": 66.9452, "num_token_overlap": 11.7319, "num_token_query": 31.4323, "num_token_union": 65.2171, "num_word_context": 202.4989, "num_word_doc": 49.9708, "num_word_query": 23.3576, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1055.5059, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1801, "query_norm": 1.2967, "queue_k_norm": 1.3725, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4323, "sent_len_1": 66.9452, "sent_len_max_0": 127.5662, "sent_len_max_1": 188.1662, "stdk": 0.047, "stdq": 0.043, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 45800 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 3.928, "doc_norm": 1.3747, "encoder_q-embeddings": 1283.8724, "encoder_q-layer.0": 890.6722, "encoder_q-layer.1": 995.762, "encoder_q-layer.10": 673.0211, "encoder_q-layer.11": 1676.2742, "encoder_q-layer.2": 1158.9417, "encoder_q-layer.3": 1201.4421, "encoder_q-layer.4": 1130.7942, "encoder_q-layer.5": 1042.7686, "encoder_q-layer.6": 1020.5016, "encoder_q-layer.7": 857.6125, "encoder_q-layer.8": 858.0591, "encoder_q-layer.9": 647.3301, "epoch": 0.3, "inbatch_neg_score": 0.1821, "inbatch_pos_score": 0.7217, "learning_rate": 3.005555555555556e-05, "loss": 3.928, "norm_diff": 0.091, "norm_loss": 0.0, "num_token_doc": 66.8051, "num_token_overlap": 11.7025, "num_token_query": 31.3854, "num_token_union": 65.1036, "num_word_context": 202.8091, "num_word_doc": 49.8366, "num_word_query": 23.2946, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1612.1612, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1818, "query_norm": 1.2837, "queue_k_norm": 1.3719, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3854, "sent_len_1": 66.8051, "sent_len_max_0": 127.3162, "sent_len_max_1": 188.6925, "stdk": 0.0471, "stdq": 0.0424, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 45900 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.9205, "doc_norm": 1.3702, "encoder_q-embeddings": 696.5145, "encoder_q-layer.0": 472.2629, "encoder_q-layer.1": 472.1032, "encoder_q-layer.10": 580.9498, "encoder_q-layer.11": 1473.6183, "encoder_q-layer.2": 537.1001, "encoder_q-layer.3": 568.0812, "encoder_q-layer.4": 624.9352, "encoder_q-layer.5": 564.1251, "encoder_q-layer.6": 591.3605, "encoder_q-layer.7": 615.3864, "encoder_q-layer.8": 668.9958, "encoder_q-layer.9": 588.743, "epoch": 0.3, "inbatch_neg_score": 0.1859, "inbatch_pos_score": 0.7251, "learning_rate": 3e-05, "loss": 3.9205, "norm_diff": 0.0842, "norm_loss": 0.0, "num_token_doc": 66.6993, "num_token_overlap": 11.6766, "num_token_query": 31.3547, "num_token_union": 65.0547, "num_word_context": 201.9767, "num_word_doc": 49.7278, "num_word_query": 23.3013, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1077.8136, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1851, "query_norm": 1.286, "queue_k_norm": 1.3724, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3547, "sent_len_1": 66.6993, "sent_len_max_0": 127.5138, "sent_len_max_1": 189.5825, "stdk": 0.047, "stdq": 0.0424, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 46000 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.9001, "doc_norm": 1.3699, "encoder_q-embeddings": 1134.783, "encoder_q-layer.0": 819.7935, "encoder_q-layer.1": 874.5632, "encoder_q-layer.10": 596.3616, "encoder_q-layer.11": 1525.9211, "encoder_q-layer.2": 1012.5969, "encoder_q-layer.3": 1142.3455, "encoder_q-layer.4": 1093.0294, "encoder_q-layer.5": 1070.0417, "encoder_q-layer.6": 1138.2817, "encoder_q-layer.7": 1111.0986, "encoder_q-layer.8": 820.1721, "encoder_q-layer.9": 592.6554, "epoch": 0.3, "inbatch_neg_score": 0.1788, "inbatch_pos_score": 0.7192, "learning_rate": 2.9944444444444446e-05, "loss": 3.9001, "norm_diff": 0.0952, "norm_loss": 0.0, "num_token_doc": 66.8335, "num_token_overlap": 11.771, "num_token_query": 31.5045, "num_token_union": 65.1622, "num_word_context": 202.3381, "num_word_doc": 49.8596, "num_word_query": 23.4099, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1540.1045, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1777, "query_norm": 1.2746, "queue_k_norm": 1.3725, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5045, "sent_len_1": 66.8335, "sent_len_max_0": 127.4062, "sent_len_max_1": 190.98, "stdk": 0.047, "stdq": 0.0422, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 46100 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.9233, "doc_norm": 1.3703, "encoder_q-embeddings": 658.5154, "encoder_q-layer.0": 423.3977, "encoder_q-layer.1": 460.8629, "encoder_q-layer.10": 648.5519, "encoder_q-layer.11": 1582.2166, "encoder_q-layer.2": 518.4136, "encoder_q-layer.3": 558.4753, "encoder_q-layer.4": 560.5652, "encoder_q-layer.5": 520.2839, "encoder_q-layer.6": 590.4232, "encoder_q-layer.7": 602.3846, "encoder_q-layer.8": 670.0934, "encoder_q-layer.9": 601.0565, "epoch": 0.3, "inbatch_neg_score": 0.1799, "inbatch_pos_score": 0.7202, "learning_rate": 2.988888888888889e-05, "loss": 3.9233, "norm_diff": 0.082, "norm_loss": 0.0, "num_token_doc": 66.7059, "num_token_overlap": 11.6947, "num_token_query": 31.4811, "num_token_union": 65.126, "num_word_context": 201.9823, "num_word_doc": 49.7458, "num_word_query": 23.382, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1080.3194, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1801, "query_norm": 1.2883, "queue_k_norm": 1.3739, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4811, "sent_len_1": 66.7059, "sent_len_max_0": 127.4562, "sent_len_max_1": 191.1475, "stdk": 0.047, "stdq": 0.0426, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 46200 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.9145, "doc_norm": 1.37, "encoder_q-embeddings": 1187.0883, "encoder_q-layer.0": 767.1688, "encoder_q-layer.1": 888.8021, "encoder_q-layer.10": 612.0865, "encoder_q-layer.11": 1490.4049, "encoder_q-layer.2": 1088.9806, "encoder_q-layer.3": 1136.802, "encoder_q-layer.4": 1149.4846, "encoder_q-layer.5": 1268.8807, "encoder_q-layer.6": 1009.5845, "encoder_q-layer.7": 813.6848, "encoder_q-layer.8": 679.6091, "encoder_q-layer.9": 576.5599, "epoch": 0.3, "inbatch_neg_score": 0.1811, "inbatch_pos_score": 0.7153, "learning_rate": 2.9833333333333335e-05, "loss": 3.9145, "norm_diff": 0.1051, "norm_loss": 0.0, "num_token_doc": 66.6102, "num_token_overlap": 11.6741, "num_token_query": 31.4076, "num_token_union": 65.074, "num_word_context": 202.004, "num_word_doc": 49.7556, "num_word_query": 23.3237, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1516.2718, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1807, "query_norm": 1.2649, "queue_k_norm": 1.3731, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4076, "sent_len_1": 66.6102, "sent_len_max_0": 127.2725, "sent_len_max_1": 187.5888, "stdk": 0.047, "stdq": 0.0415, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 46300 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 3.9266, "doc_norm": 1.3729, "encoder_q-embeddings": 862.1232, "encoder_q-layer.0": 612.1356, "encoder_q-layer.1": 657.2311, "encoder_q-layer.10": 615.6512, "encoder_q-layer.11": 1540.6508, "encoder_q-layer.2": 744.4415, "encoder_q-layer.3": 762.6594, "encoder_q-layer.4": 760.1006, "encoder_q-layer.5": 748.9477, "encoder_q-layer.6": 823.995, "encoder_q-layer.7": 1006.0959, "encoder_q-layer.8": 934.052, "encoder_q-layer.9": 615.1808, "epoch": 0.3, "inbatch_neg_score": 0.1856, "inbatch_pos_score": 0.7231, "learning_rate": 2.9777777777777777e-05, "loss": 3.9266, "norm_diff": 0.0911, "norm_loss": 0.0, "num_token_doc": 66.6019, "num_token_overlap": 11.6574, "num_token_query": 31.3291, "num_token_union": 64.943, "num_word_context": 202.0891, "num_word_doc": 49.6995, "num_word_query": 23.2374, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1280.8812, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.186, "query_norm": 1.2818, "queue_k_norm": 1.3714, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3291, "sent_len_1": 66.6019, "sent_len_max_0": 127.4325, "sent_len_max_1": 188.8575, "stdk": 0.0472, "stdq": 0.0419, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 46400 }, { "accuracy": 41.3086, "active_queue_size": 16384.0, "cl_loss": 3.897, "doc_norm": 1.3738, "encoder_q-embeddings": 1028.8527, "encoder_q-layer.0": 720.4894, "encoder_q-layer.1": 796.3261, "encoder_q-layer.10": 629.6097, "encoder_q-layer.11": 1567.4482, "encoder_q-layer.2": 997.0557, "encoder_q-layer.3": 1069.2889, "encoder_q-layer.4": 1079.9675, "encoder_q-layer.5": 893.3781, "encoder_q-layer.6": 979.3508, "encoder_q-layer.7": 944.5484, "encoder_q-layer.8": 791.4214, "encoder_q-layer.9": 610.395, "epoch": 0.3, "inbatch_neg_score": 0.1825, "inbatch_pos_score": 0.7085, "learning_rate": 2.9722222222222223e-05, "loss": 3.897, "norm_diff": 0.0866, "norm_loss": 0.0, "num_token_doc": 67.0003, "num_token_overlap": 11.7242, "num_token_query": 31.5087, "num_token_union": 65.2881, "num_word_context": 202.3033, "num_word_doc": 49.9841, "num_word_query": 23.3952, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1477.7498, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1832, "query_norm": 1.2872, "queue_k_norm": 1.3738, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5087, "sent_len_1": 67.0003, "sent_len_max_0": 127.4287, "sent_len_max_1": 189.5838, "stdk": 0.0472, "stdq": 0.0421, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 46500 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.8953, "doc_norm": 1.3795, "encoder_q-embeddings": 809.9404, "encoder_q-layer.0": 577.3704, "encoder_q-layer.1": 586.4968, "encoder_q-layer.10": 593.905, "encoder_q-layer.11": 1463.3395, "encoder_q-layer.2": 688.6461, "encoder_q-layer.3": 687.7095, "encoder_q-layer.4": 702.0965, "encoder_q-layer.5": 667.5113, "encoder_q-layer.6": 702.1431, "encoder_q-layer.7": 700.374, "encoder_q-layer.8": 674.0057, "encoder_q-layer.9": 601.6123, "epoch": 0.3, "inbatch_neg_score": 0.1833, "inbatch_pos_score": 0.7397, "learning_rate": 2.9666666666666672e-05, "loss": 3.8953, "norm_diff": 0.0733, "norm_loss": 0.0, "num_token_doc": 66.5595, "num_token_overlap": 11.6867, "num_token_query": 31.4736, "num_token_union": 65.0451, "num_word_context": 201.9967, "num_word_doc": 49.6883, "num_word_query": 23.3863, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1158.7909, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1846, "query_norm": 1.3061, "queue_k_norm": 1.3728, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4736, "sent_len_1": 66.5595, "sent_len_max_0": 127.5613, "sent_len_max_1": 188.8512, "stdk": 0.0474, "stdq": 0.0428, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 46600 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.9181, "doc_norm": 1.3768, "encoder_q-embeddings": 803.1385, "encoder_q-layer.0": 555.9259, "encoder_q-layer.1": 631.0645, "encoder_q-layer.10": 645.3378, "encoder_q-layer.11": 1543.7231, "encoder_q-layer.2": 710.9904, "encoder_q-layer.3": 717.3536, "encoder_q-layer.4": 783.938, "encoder_q-layer.5": 777.9741, "encoder_q-layer.6": 755.2358, "encoder_q-layer.7": 796.9536, "encoder_q-layer.8": 793.9416, "encoder_q-layer.9": 622.8907, "epoch": 0.3, "inbatch_neg_score": 0.1893, "inbatch_pos_score": 0.7446, "learning_rate": 2.961111111111111e-05, "loss": 3.9181, "norm_diff": 0.0902, "norm_loss": 0.0, "num_token_doc": 66.8628, "num_token_overlap": 11.6692, "num_token_query": 31.3841, "num_token_union": 65.1445, "num_word_context": 202.3488, "num_word_doc": 49.8826, "num_word_query": 23.3095, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1217.414, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1879, "query_norm": 1.2867, "queue_k_norm": 1.374, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3841, "sent_len_1": 66.8628, "sent_len_max_0": 127.5413, "sent_len_max_1": 191.915, "stdk": 0.0473, "stdq": 0.0417, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 46700 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 3.89, "doc_norm": 1.3693, "encoder_q-embeddings": 876.456, "encoder_q-layer.0": 587.3939, "encoder_q-layer.1": 662.4688, "encoder_q-layer.10": 670.9976, "encoder_q-layer.11": 1591.7109, "encoder_q-layer.2": 745.7776, "encoder_q-layer.3": 789.6934, "encoder_q-layer.4": 835.1031, "encoder_q-layer.5": 792.4101, "encoder_q-layer.6": 792.5112, "encoder_q-layer.7": 839.4236, "encoder_q-layer.8": 853.9441, "encoder_q-layer.9": 672.3687, "epoch": 0.3, "inbatch_neg_score": 0.1862, "inbatch_pos_score": 0.7222, "learning_rate": 2.955555555555556e-05, "loss": 3.89, "norm_diff": 0.0624, "norm_loss": 0.0, "num_token_doc": 66.9427, "num_token_overlap": 11.7034, "num_token_query": 31.374, "num_token_union": 65.2099, "num_word_context": 202.7473, "num_word_doc": 49.9577, "num_word_query": 23.3314, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1275.4622, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1849, "query_norm": 1.3069, "queue_k_norm": 1.3746, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.374, "sent_len_1": 66.9427, "sent_len_max_0": 127.23, "sent_len_max_1": 191.5137, "stdk": 0.047, "stdq": 0.0427, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 46800 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.9115, "doc_norm": 1.3756, "encoder_q-embeddings": 814.5882, "encoder_q-layer.0": 544.2726, "encoder_q-layer.1": 580.7745, "encoder_q-layer.10": 654.0664, "encoder_q-layer.11": 1497.2219, "encoder_q-layer.2": 677.3099, "encoder_q-layer.3": 678.3017, "encoder_q-layer.4": 727.9441, "encoder_q-layer.5": 729.266, "encoder_q-layer.6": 790.0045, "encoder_q-layer.7": 759.025, "encoder_q-layer.8": 766.6356, "encoder_q-layer.9": 604.1594, "epoch": 0.31, "inbatch_neg_score": 0.1877, "inbatch_pos_score": 0.748, "learning_rate": 2.95e-05, "loss": 3.9115, "norm_diff": 0.0826, "norm_loss": 0.0, "num_token_doc": 66.4545, "num_token_overlap": 11.6653, "num_token_query": 31.4558, "num_token_union": 64.9727, "num_word_context": 201.9573, "num_word_doc": 49.5655, "num_word_query": 23.3798, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1196.3487, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1895, "query_norm": 1.293, "queue_k_norm": 1.3737, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4558, "sent_len_1": 66.4545, "sent_len_max_0": 127.4213, "sent_len_max_1": 191.4137, "stdk": 0.0473, "stdq": 0.0421, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 46900 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.9138, "doc_norm": 1.3767, "encoder_q-embeddings": 874.7838, "encoder_q-layer.0": 656.7512, "encoder_q-layer.1": 658.6874, "encoder_q-layer.10": 662.7368, "encoder_q-layer.11": 1592.5098, "encoder_q-layer.2": 703.783, "encoder_q-layer.3": 735.4482, "encoder_q-layer.4": 733.2722, "encoder_q-layer.5": 685.8511, "encoder_q-layer.6": 676.76, "encoder_q-layer.7": 715.102, "encoder_q-layer.8": 768.2914, "encoder_q-layer.9": 640.2672, "epoch": 0.31, "inbatch_neg_score": 0.1908, "inbatch_pos_score": 0.7271, "learning_rate": 2.9444444444444448e-05, "loss": 3.9138, "norm_diff": 0.0769, "norm_loss": 0.0, "num_token_doc": 67.0354, "num_token_overlap": 11.6984, "num_token_query": 31.3434, "num_token_union": 65.2016, "num_word_context": 202.1053, "num_word_doc": 50.021, "num_word_query": 23.2683, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1247.42, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1886, "query_norm": 1.2997, "queue_k_norm": 1.3735, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3434, "sent_len_1": 67.0354, "sent_len_max_0": 127.4013, "sent_len_max_1": 190.7713, "stdk": 0.0472, "stdq": 0.0424, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 47000 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.8857, "doc_norm": 1.3714, "encoder_q-embeddings": 617.7543, "encoder_q-layer.0": 426.57, "encoder_q-layer.1": 445.5219, "encoder_q-layer.10": 644.0634, "encoder_q-layer.11": 1554.0212, "encoder_q-layer.2": 499.2118, "encoder_q-layer.3": 490.1581, "encoder_q-layer.4": 523.5333, "encoder_q-layer.5": 519.2647, "encoder_q-layer.6": 574.9221, "encoder_q-layer.7": 667.4252, "encoder_q-layer.8": 677.8547, "encoder_q-layer.9": 595.2304, "epoch": 0.31, "inbatch_neg_score": 0.1921, "inbatch_pos_score": 0.7383, "learning_rate": 2.9388888888888887e-05, "loss": 3.8857, "norm_diff": 0.0739, "norm_loss": 0.0, "num_token_doc": 66.8931, "num_token_overlap": 11.7015, "num_token_query": 31.4922, "num_token_union": 65.2789, "num_word_context": 202.3791, "num_word_doc": 49.9257, "num_word_query": 23.4065, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1054.2915, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1917, "query_norm": 1.2975, "queue_k_norm": 1.3743, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4922, "sent_len_1": 66.8931, "sent_len_max_0": 127.575, "sent_len_max_1": 190.9462, "stdk": 0.0471, "stdq": 0.0422, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 47100 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 3.929, "doc_norm": 1.3647, "encoder_q-embeddings": 834.2761, "encoder_q-layer.0": 548.3947, "encoder_q-layer.1": 633.4585, "encoder_q-layer.10": 634.9694, "encoder_q-layer.11": 1535.3408, "encoder_q-layer.2": 746.321, "encoder_q-layer.3": 710.0885, "encoder_q-layer.4": 793.3361, "encoder_q-layer.5": 703.616, "encoder_q-layer.6": 743.1019, "encoder_q-layer.7": 750.4095, "encoder_q-layer.8": 727.5876, "encoder_q-layer.9": 607.2001, "epoch": 0.31, "inbatch_neg_score": 0.2028, "inbatch_pos_score": 0.7344, "learning_rate": 2.9333333333333336e-05, "loss": 3.929, "norm_diff": 0.0562, "norm_loss": 0.0, "num_token_doc": 66.6034, "num_token_overlap": 11.6235, "num_token_query": 31.2454, "num_token_union": 64.9473, "num_word_context": 201.9589, "num_word_doc": 49.7015, "num_word_query": 23.1863, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1214.8862, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2013, "query_norm": 1.3086, "queue_k_norm": 1.373, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2454, "sent_len_1": 66.6034, "sent_len_max_0": 127.37, "sent_len_max_1": 188.6987, "stdk": 0.0468, "stdq": 0.0425, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 47200 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 3.9055, "doc_norm": 1.3784, "encoder_q-embeddings": 1211.5529, "encoder_q-layer.0": 916.3976, "encoder_q-layer.1": 927.8478, "encoder_q-layer.10": 595.6375, "encoder_q-layer.11": 1479.2336, "encoder_q-layer.2": 1044.109, "encoder_q-layer.3": 1024.2598, "encoder_q-layer.4": 1083.4147, "encoder_q-layer.5": 997.607, "encoder_q-layer.6": 971.5862, "encoder_q-layer.7": 938.5065, "encoder_q-layer.8": 789.6735, "encoder_q-layer.9": 598.3827, "epoch": 0.31, "inbatch_neg_score": 0.2045, "inbatch_pos_score": 0.7339, "learning_rate": 2.927777777777778e-05, "loss": 3.9055, "norm_diff": 0.0888, "norm_loss": 0.0, "num_token_doc": 66.602, "num_token_overlap": 11.7063, "num_token_query": 31.4161, "num_token_union": 65.0443, "num_word_context": 202.1876, "num_word_doc": 49.7578, "num_word_query": 23.3372, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1520.0177, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2036, "query_norm": 1.2896, "queue_k_norm": 1.3744, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4161, "sent_len_1": 66.602, "sent_len_max_0": 127.625, "sent_len_max_1": 186.3875, "stdk": 0.0473, "stdq": 0.0419, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 47300 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 3.9114, "doc_norm": 1.3726, "encoder_q-embeddings": 1722.7001, "encoder_q-layer.0": 1184.8843, "encoder_q-layer.1": 1327.7466, "encoder_q-layer.10": 1205.8102, "encoder_q-layer.11": 3006.3669, "encoder_q-layer.2": 1591.03, "encoder_q-layer.3": 1737.4652, "encoder_q-layer.4": 1745.1169, "encoder_q-layer.5": 1748.2316, "encoder_q-layer.6": 1801.2366, "encoder_q-layer.7": 1912.6597, "encoder_q-layer.8": 1627.8536, "encoder_q-layer.9": 1221.3765, "epoch": 0.31, "inbatch_neg_score": 0.2035, "inbatch_pos_score": 0.751, "learning_rate": 2.9222222222222224e-05, "loss": 3.9114, "norm_diff": 0.0703, "norm_loss": 0.0, "num_token_doc": 66.7972, "num_token_overlap": 11.6782, "num_token_query": 31.3779, "num_token_union": 65.132, "num_word_context": 202.1045, "num_word_doc": 49.8495, "num_word_query": 23.3077, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2588.1014, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2041, "query_norm": 1.3024, "queue_k_norm": 1.3761, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3779, "sent_len_1": 66.7972, "sent_len_max_0": 127.4575, "sent_len_max_1": 188.42, "stdk": 0.0471, "stdq": 0.0424, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 47400 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 3.9193, "doc_norm": 1.3704, "encoder_q-embeddings": 1263.5664, "encoder_q-layer.0": 825.5595, "encoder_q-layer.1": 832.7988, "encoder_q-layer.10": 1263.1196, "encoder_q-layer.11": 3038.0942, "encoder_q-layer.2": 952.3757, "encoder_q-layer.3": 981.5449, "encoder_q-layer.4": 1016.3624, "encoder_q-layer.5": 990.1917, "encoder_q-layer.6": 1089.2953, "encoder_q-layer.7": 1216.9856, "encoder_q-layer.8": 1253.7802, "encoder_q-layer.9": 1159.9684, "epoch": 0.31, "inbatch_neg_score": 0.2053, "inbatch_pos_score": 0.7305, "learning_rate": 2.916666666666667e-05, "loss": 3.9193, "norm_diff": 0.0902, "norm_loss": 0.0, "num_token_doc": 66.7637, "num_token_overlap": 11.6449, "num_token_query": 31.2067, "num_token_union": 65.0259, "num_word_context": 202.0134, "num_word_doc": 49.8517, "num_word_query": 23.1597, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2063.9669, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2043, "query_norm": 1.2803, "queue_k_norm": 1.3761, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2067, "sent_len_1": 66.7637, "sent_len_max_0": 127.32, "sent_len_max_1": 188.4137, "stdk": 0.047, "stdq": 0.0415, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 47500 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 3.9244, "doc_norm": 1.374, "encoder_q-embeddings": 1380.2451, "encoder_q-layer.0": 918.2878, "encoder_q-layer.1": 986.297, "encoder_q-layer.10": 1294.578, "encoder_q-layer.11": 2863.5344, "encoder_q-layer.2": 1161.5431, "encoder_q-layer.3": 1148.7897, "encoder_q-layer.4": 1161.0997, "encoder_q-layer.5": 1044.196, "encoder_q-layer.6": 1106.4893, "encoder_q-layer.7": 1124.7368, "encoder_q-layer.8": 1275.7306, "encoder_q-layer.9": 1162.8438, "epoch": 0.31, "inbatch_neg_score": 0.2074, "inbatch_pos_score": 0.7534, "learning_rate": 2.9111111111111112e-05, "loss": 3.9244, "norm_diff": 0.0813, "norm_loss": 0.0, "num_token_doc": 66.5983, "num_token_overlap": 11.6818, "num_token_query": 31.4318, "num_token_union": 65.0358, "num_word_context": 202.0368, "num_word_doc": 49.72, "num_word_query": 23.3309, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2072.6736, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.208, "query_norm": 1.2926, "queue_k_norm": 1.3752, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4318, "sent_len_1": 66.5983, "sent_len_max_0": 127.5275, "sent_len_max_1": 189.1163, "stdk": 0.0471, "stdq": 0.042, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 47600 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 3.8935, "doc_norm": 1.3785, "encoder_q-embeddings": 1769.9263, "encoder_q-layer.0": 1321.3787, "encoder_q-layer.1": 1260.3907, "encoder_q-layer.10": 1278.3677, "encoder_q-layer.11": 3010.8369, "encoder_q-layer.2": 1348.2803, "encoder_q-layer.3": 1254.3685, "encoder_q-layer.4": 1233.7375, "encoder_q-layer.5": 1210.4052, "encoder_q-layer.6": 1210.5618, "encoder_q-layer.7": 1281.8118, "encoder_q-layer.8": 1474.7817, "encoder_q-layer.9": 1254.4354, "epoch": 0.31, "inbatch_neg_score": 0.2099, "inbatch_pos_score": 0.7393, "learning_rate": 2.9055555555555558e-05, "loss": 3.8935, "norm_diff": 0.0702, "norm_loss": 0.0, "num_token_doc": 66.7331, "num_token_overlap": 11.6555, "num_token_query": 31.3845, "num_token_union": 65.1484, "num_word_context": 202.0986, "num_word_doc": 49.7891, "num_word_query": 23.3039, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2320.9098, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2092, "query_norm": 1.3084, "queue_k_norm": 1.3783, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3845, "sent_len_1": 66.7331, "sent_len_max_0": 127.4912, "sent_len_max_1": 189.6825, "stdk": 0.0472, "stdq": 0.0427, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 47700 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.9045, "doc_norm": 1.376, "encoder_q-embeddings": 1141.7072, "encoder_q-layer.0": 756.8622, "encoder_q-layer.1": 802.8408, "encoder_q-layer.10": 1204.8157, "encoder_q-layer.11": 2935.4973, "encoder_q-layer.2": 949.9217, "encoder_q-layer.3": 953.8372, "encoder_q-layer.4": 975.0781, "encoder_q-layer.5": 989.3013, "encoder_q-layer.6": 1100.9446, "encoder_q-layer.7": 1226.6118, "encoder_q-layer.8": 1353.344, "encoder_q-layer.9": 1142.7759, "epoch": 0.31, "inbatch_neg_score": 0.2101, "inbatch_pos_score": 0.7583, "learning_rate": 2.9e-05, "loss": 3.9045, "norm_diff": 0.0819, "norm_loss": 0.0, "num_token_doc": 66.7367, "num_token_overlap": 11.6889, "num_token_query": 31.4187, "num_token_union": 65.1167, "num_word_context": 202.0578, "num_word_doc": 49.7964, "num_word_query": 23.3288, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2016.5712, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2106, "query_norm": 1.2941, "queue_k_norm": 1.3795, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4187, "sent_len_1": 66.7367, "sent_len_max_0": 127.5563, "sent_len_max_1": 190.3862, "stdk": 0.0471, "stdq": 0.0421, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 47800 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.9005, "doc_norm": 1.3809, "encoder_q-embeddings": 2369.9502, "encoder_q-layer.0": 1621.8325, "encoder_q-layer.1": 1771.022, "encoder_q-layer.10": 1266.3145, "encoder_q-layer.11": 3076.5754, "encoder_q-layer.2": 2348.1748, "encoder_q-layer.3": 2241.7708, "encoder_q-layer.4": 2297.7251, "encoder_q-layer.5": 2236.4185, "encoder_q-layer.6": 2144.6899, "encoder_q-layer.7": 1929.2477, "encoder_q-layer.8": 1494.2898, "encoder_q-layer.9": 1229.8051, "epoch": 0.31, "inbatch_neg_score": 0.2094, "inbatch_pos_score": 0.752, "learning_rate": 2.8944444444444446e-05, "loss": 3.9005, "norm_diff": 0.089, "norm_loss": 0.0, "num_token_doc": 66.7032, "num_token_overlap": 11.6917, "num_token_query": 31.463, "num_token_union": 65.1483, "num_word_context": 202.3223, "num_word_doc": 49.7543, "num_word_query": 23.3792, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3133.4196, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2089, "query_norm": 1.2919, "queue_k_norm": 1.377, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.463, "sent_len_1": 66.7032, "sent_len_max_0": 127.39, "sent_len_max_1": 189.9225, "stdk": 0.0472, "stdq": 0.0422, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 47900 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.8991, "doc_norm": 1.3685, "encoder_q-embeddings": 2690.3608, "encoder_q-layer.0": 1780.6022, "encoder_q-layer.1": 1891.6554, "encoder_q-layer.10": 1232.3646, "encoder_q-layer.11": 3127.8132, "encoder_q-layer.2": 2251.6572, "encoder_q-layer.3": 2506.5798, "encoder_q-layer.4": 2672.811, "encoder_q-layer.5": 2651.3662, "encoder_q-layer.6": 2941.7996, "encoder_q-layer.7": 2455.1895, "encoder_q-layer.8": 1925.2527, "encoder_q-layer.9": 1303.8898, "epoch": 0.31, "inbatch_neg_score": 0.2149, "inbatch_pos_score": 0.769, "learning_rate": 2.8888888888888888e-05, "loss": 3.8991, "norm_diff": 0.0616, "norm_loss": 0.0, "num_token_doc": 66.9229, "num_token_overlap": 11.6812, "num_token_query": 31.4573, "num_token_union": 65.2608, "num_word_context": 202.6689, "num_word_doc": 49.9523, "num_word_query": 23.3632, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3530.5711, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2146, "query_norm": 1.3069, "queue_k_norm": 1.3782, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4573, "sent_len_1": 66.9229, "sent_len_max_0": 127.4988, "sent_len_max_1": 189.5375, "stdk": 0.0468, "stdq": 0.0426, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 48000 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.9062, "doc_norm": 1.3754, "encoder_q-embeddings": 1876.8699, "encoder_q-layer.0": 1269.6992, "encoder_q-layer.1": 1341.098, "encoder_q-layer.10": 1355.7573, "encoder_q-layer.11": 3112.0337, "encoder_q-layer.2": 1470.1744, "encoder_q-layer.3": 1602.3268, "encoder_q-layer.4": 1761.3062, "encoder_q-layer.5": 1487.4304, "encoder_q-layer.6": 1461.2065, "encoder_q-layer.7": 1382.2881, "encoder_q-layer.8": 1400.2186, "encoder_q-layer.9": 1240.75, "epoch": 0.31, "inbatch_neg_score": 0.2061, "inbatch_pos_score": 0.7427, "learning_rate": 2.8833333333333334e-05, "loss": 3.9062, "norm_diff": 0.0706, "norm_loss": 0.0, "num_token_doc": 66.4917, "num_token_overlap": 11.6211, "num_token_query": 31.3359, "num_token_union": 64.9435, "num_word_context": 201.9289, "num_word_doc": 49.5883, "num_word_query": 23.2456, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2532.537, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2078, "query_norm": 1.3048, "queue_k_norm": 1.3788, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3359, "sent_len_1": 66.4917, "sent_len_max_0": 127.5263, "sent_len_max_1": 189.5588, "stdk": 0.047, "stdq": 0.0427, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 48100 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.9072, "doc_norm": 1.3819, "encoder_q-embeddings": 1491.2911, "encoder_q-layer.0": 1001.0493, "encoder_q-layer.1": 1136.1655, "encoder_q-layer.10": 1238.4401, "encoder_q-layer.11": 3134.51, "encoder_q-layer.2": 1326.8384, "encoder_q-layer.3": 1417.2462, "encoder_q-layer.4": 1467.6202, "encoder_q-layer.5": 1391.0826, "encoder_q-layer.6": 1551.0371, "encoder_q-layer.7": 1439.7661, "encoder_q-layer.8": 1561.5347, "encoder_q-layer.9": 1232.0005, "epoch": 0.31, "inbatch_neg_score": 0.2072, "inbatch_pos_score": 0.7622, "learning_rate": 2.877777777777778e-05, "loss": 3.9072, "norm_diff": 0.0783, "norm_loss": 0.0, "num_token_doc": 66.6007, "num_token_overlap": 11.6676, "num_token_query": 31.3049, "num_token_union": 64.9817, "num_word_context": 202.0455, "num_word_doc": 49.7043, "num_word_query": 23.244, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2379.0725, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2063, "query_norm": 1.3037, "queue_k_norm": 1.3782, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3049, "sent_len_1": 66.6007, "sent_len_max_0": 127.4613, "sent_len_max_1": 187.045, "stdk": 0.0472, "stdq": 0.0429, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 48200 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 3.8987, "doc_norm": 1.3827, "encoder_q-embeddings": 1198.5906, "encoder_q-layer.0": 796.9937, "encoder_q-layer.1": 837.572, "encoder_q-layer.10": 1256.9219, "encoder_q-layer.11": 3096.3701, "encoder_q-layer.2": 987.4385, "encoder_q-layer.3": 1061.0646, "encoder_q-layer.4": 1100.3217, "encoder_q-layer.5": 1028.4921, "encoder_q-layer.6": 1135.6082, "encoder_q-layer.7": 1163.8687, "encoder_q-layer.8": 1326.6171, "encoder_q-layer.9": 1278.9785, "epoch": 0.31, "inbatch_neg_score": 0.201, "inbatch_pos_score": 0.75, "learning_rate": 2.8722222222222222e-05, "loss": 3.8987, "norm_diff": 0.1021, "norm_loss": 0.0, "num_token_doc": 66.9093, "num_token_overlap": 11.659, "num_token_query": 31.341, "num_token_union": 65.2582, "num_word_context": 202.4315, "num_word_doc": 49.9446, "num_word_query": 23.2633, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2095.9027, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2015, "query_norm": 1.2806, "queue_k_norm": 1.3816, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.341, "sent_len_1": 66.9093, "sent_len_max_0": 127.605, "sent_len_max_1": 188.08, "stdk": 0.0473, "stdq": 0.0421, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 48300 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.8814, "doc_norm": 1.3774, "encoder_q-embeddings": 1078.2828, "encoder_q-layer.0": 748.4369, "encoder_q-layer.1": 759.9576, "encoder_q-layer.10": 1204.9332, "encoder_q-layer.11": 2912.3342, "encoder_q-layer.2": 845.078, "encoder_q-layer.3": 893.347, "encoder_q-layer.4": 922.2833, "encoder_q-layer.5": 914.9623, "encoder_q-layer.6": 992.6001, "encoder_q-layer.7": 1088.6361, "encoder_q-layer.8": 1241.9976, "encoder_q-layer.9": 1096.9229, "epoch": 0.32, "inbatch_neg_score": 0.1993, "inbatch_pos_score": 0.7471, "learning_rate": 2.8666666666666668e-05, "loss": 3.8814, "norm_diff": 0.1077, "norm_loss": 0.0, "num_token_doc": 66.8405, "num_token_overlap": 11.7462, "num_token_query": 31.59, "num_token_union": 65.2501, "num_word_context": 202.3432, "num_word_doc": 49.8688, "num_word_query": 23.456, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1921.7025, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1985, "query_norm": 1.2697, "queue_k_norm": 1.3804, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.59, "sent_len_1": 66.8405, "sent_len_max_0": 127.6112, "sent_len_max_1": 189.3512, "stdk": 0.0471, "stdq": 0.0417, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 48400 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.8928, "doc_norm": 1.3853, "encoder_q-embeddings": 1835.0989, "encoder_q-layer.0": 1209.3575, "encoder_q-layer.1": 1286.1993, "encoder_q-layer.10": 1693.6547, "encoder_q-layer.11": 3583.8025, "encoder_q-layer.2": 1425.3396, "encoder_q-layer.3": 1436.3951, "encoder_q-layer.4": 1476.4204, "encoder_q-layer.5": 1391.0835, "encoder_q-layer.6": 1328.9315, "encoder_q-layer.7": 1415.9237, "encoder_q-layer.8": 1767.6771, "encoder_q-layer.9": 1674.1904, "epoch": 0.32, "inbatch_neg_score": 0.1917, "inbatch_pos_score": 0.7603, "learning_rate": 2.861111111111111e-05, "loss": 3.8928, "norm_diff": 0.0564, "norm_loss": 0.0, "num_token_doc": 66.5906, "num_token_overlap": 11.6204, "num_token_query": 31.4038, "num_token_union": 65.103, "num_word_context": 202.0777, "num_word_doc": 49.7369, "num_word_query": 23.3238, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2626.0207, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1917, "query_norm": 1.329, "queue_k_norm": 1.3803, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4038, "sent_len_1": 66.5906, "sent_len_max_0": 127.395, "sent_len_max_1": 188.1087, "stdk": 0.0474, "stdq": 0.0441, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 48500 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.8991, "doc_norm": 1.3784, "encoder_q-embeddings": 1199.2042, "encoder_q-layer.0": 811.4059, "encoder_q-layer.1": 857.2703, "encoder_q-layer.10": 1177.3878, "encoder_q-layer.11": 3116.0322, "encoder_q-layer.2": 963.2801, "encoder_q-layer.3": 983.0922, "encoder_q-layer.4": 1030.9857, "encoder_q-layer.5": 1068.9495, "encoder_q-layer.6": 1106.3945, "encoder_q-layer.7": 1154.8517, "encoder_q-layer.8": 1285.4053, "encoder_q-layer.9": 1122.9393, "epoch": 0.32, "inbatch_neg_score": 0.19, "inbatch_pos_score": 0.7197, "learning_rate": 2.855555555555556e-05, "loss": 3.8991, "norm_diff": 0.1029, "norm_loss": 0.0, "num_token_doc": 66.7236, "num_token_overlap": 11.7121, "num_token_query": 31.5436, "num_token_union": 65.1727, "num_word_context": 202.1669, "num_word_doc": 49.7885, "num_word_query": 23.4245, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2072.6251, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1896, "query_norm": 1.2755, "queue_k_norm": 1.3813, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5436, "sent_len_1": 66.7236, "sent_len_max_0": 127.6425, "sent_len_max_1": 189.2125, "stdk": 0.0471, "stdq": 0.042, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 48600 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.8973, "doc_norm": 1.3939, "encoder_q-embeddings": 2045.0354, "encoder_q-layer.0": 1518.7815, "encoder_q-layer.1": 1610.2162, "encoder_q-layer.10": 1285.1656, "encoder_q-layer.11": 3015.5876, "encoder_q-layer.2": 1949.2843, "encoder_q-layer.3": 1940.3768, "encoder_q-layer.4": 1890.8574, "encoder_q-layer.5": 1983.7751, "encoder_q-layer.6": 1915.5802, "encoder_q-layer.7": 1720.9825, "encoder_q-layer.8": 1515.1737, "encoder_q-layer.9": 1198.7164, "epoch": 0.32, "inbatch_neg_score": 0.18, "inbatch_pos_score": 0.7476, "learning_rate": 2.8499999999999998e-05, "loss": 3.8973, "norm_diff": 0.1203, "norm_loss": 0.0, "num_token_doc": 66.8367, "num_token_overlap": 11.6706, "num_token_query": 31.3345, "num_token_union": 65.1593, "num_word_context": 202.7266, "num_word_doc": 49.875, "num_word_query": 23.2741, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2818.0197, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1821, "query_norm": 1.2736, "queue_k_norm": 1.3813, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3345, "sent_len_1": 66.8367, "sent_len_max_0": 127.6225, "sent_len_max_1": 189.12, "stdk": 0.0478, "stdq": 0.0421, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 48700 }, { "accuracy": 42.7734, "active_queue_size": 16384.0, "cl_loss": 3.8933, "doc_norm": 1.3769, "encoder_q-embeddings": 1394.224, "encoder_q-layer.0": 927.8925, "encoder_q-layer.1": 987.3403, "encoder_q-layer.10": 1253.4047, "encoder_q-layer.11": 3024.7214, "encoder_q-layer.2": 1046.6732, "encoder_q-layer.3": 1085.3866, "encoder_q-layer.4": 1093.2214, "encoder_q-layer.5": 1090.1823, "encoder_q-layer.6": 1172.3812, "encoder_q-layer.7": 1261.5297, "encoder_q-layer.8": 1342.0889, "encoder_q-layer.9": 1219.7191, "epoch": 0.32, "inbatch_neg_score": 0.1816, "inbatch_pos_score": 0.7324, "learning_rate": 2.8444444444444447e-05, "loss": 3.8933, "norm_diff": 0.0841, "norm_loss": 0.0, "num_token_doc": 66.7557, "num_token_overlap": 11.6311, "num_token_query": 31.1994, "num_token_union": 65.0442, "num_word_context": 202.4249, "num_word_doc": 49.8148, "num_word_query": 23.167, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2152.9645, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1814, "query_norm": 1.2928, "queue_k_norm": 1.3807, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.1994, "sent_len_1": 66.7557, "sent_len_max_0": 127.5162, "sent_len_max_1": 188.2038, "stdk": 0.0471, "stdq": 0.0428, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 48800 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 3.8948, "doc_norm": 1.375, "encoder_q-embeddings": 1158.6973, "encoder_q-layer.0": 776.267, "encoder_q-layer.1": 778.272, "encoder_q-layer.10": 1355.0597, "encoder_q-layer.11": 3186.8997, "encoder_q-layer.2": 876.4188, "encoder_q-layer.3": 910.3109, "encoder_q-layer.4": 911.823, "encoder_q-layer.5": 950.1988, "encoder_q-layer.6": 1123.3403, "encoder_q-layer.7": 1187.2622, "encoder_q-layer.8": 1427.1479, "encoder_q-layer.9": 1326.5082, "epoch": 0.32, "inbatch_neg_score": 0.1867, "inbatch_pos_score": 0.7285, "learning_rate": 2.8388888888888893e-05, "loss": 3.8948, "norm_diff": 0.0795, "norm_loss": 0.0, "num_token_doc": 66.6543, "num_token_overlap": 11.6682, "num_token_query": 31.3658, "num_token_union": 65.0286, "num_word_context": 202.3252, "num_word_doc": 49.7048, "num_word_query": 23.2926, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2042.1357, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1857, "query_norm": 1.2954, "queue_k_norm": 1.3813, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3658, "sent_len_1": 66.6543, "sent_len_max_0": 127.4712, "sent_len_max_1": 189.8963, "stdk": 0.0471, "stdq": 0.0427, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 48900 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.8976, "doc_norm": 1.3794, "encoder_q-embeddings": 1508.811, "encoder_q-layer.0": 1049.8732, "encoder_q-layer.1": 1174.2477, "encoder_q-layer.10": 1144.168, "encoder_q-layer.11": 2942.6606, "encoder_q-layer.2": 1320.5986, "encoder_q-layer.3": 1365.0944, "encoder_q-layer.4": 1427.8967, "encoder_q-layer.5": 1345.7576, "encoder_q-layer.6": 1411.7407, "encoder_q-layer.7": 1384.2874, "encoder_q-layer.8": 1373.3647, "encoder_q-layer.9": 1148.9651, "epoch": 0.32, "inbatch_neg_score": 0.1819, "inbatch_pos_score": 0.7271, "learning_rate": 2.8333333333333335e-05, "loss": 3.8976, "norm_diff": 0.0972, "norm_loss": 0.0, "num_token_doc": 66.8836, "num_token_overlap": 11.6852, "num_token_query": 31.3633, "num_token_union": 65.1023, "num_word_context": 201.9592, "num_word_doc": 49.8392, "num_word_query": 23.2771, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2279.0121, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1824, "query_norm": 1.2822, "queue_k_norm": 1.3806, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3633, "sent_len_1": 66.8836, "sent_len_max_0": 127.45, "sent_len_max_1": 191.5813, "stdk": 0.0473, "stdq": 0.0423, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 49000 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.888, "doc_norm": 1.3719, "encoder_q-embeddings": 1198.9033, "encoder_q-layer.0": 783.201, "encoder_q-layer.1": 832.4889, "encoder_q-layer.10": 1263.1283, "encoder_q-layer.11": 3055.0117, "encoder_q-layer.2": 928.4197, "encoder_q-layer.3": 957.0359, "encoder_q-layer.4": 1000.8935, "encoder_q-layer.5": 971.8503, "encoder_q-layer.6": 1036.8608, "encoder_q-layer.7": 1139.1448, "encoder_q-layer.8": 1282.8085, "encoder_q-layer.9": 1204.6692, "epoch": 0.32, "inbatch_neg_score": 0.1863, "inbatch_pos_score": 0.7314, "learning_rate": 2.827777777777778e-05, "loss": 3.888, "norm_diff": 0.1013, "norm_loss": 0.0, "num_token_doc": 66.6362, "num_token_overlap": 11.6902, "num_token_query": 31.5479, "num_token_union": 65.158, "num_word_context": 202.4595, "num_word_doc": 49.7436, "num_word_query": 23.4515, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1989.9089, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1868, "query_norm": 1.2707, "queue_k_norm": 1.3795, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.5479, "sent_len_1": 66.6362, "sent_len_max_0": 127.4188, "sent_len_max_1": 190.1238, "stdk": 0.047, "stdq": 0.0417, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 49100 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.8768, "doc_norm": 1.3843, "encoder_q-embeddings": 1661.1676, "encoder_q-layer.0": 1132.6487, "encoder_q-layer.1": 1243.1295, "encoder_q-layer.10": 1203.6842, "encoder_q-layer.11": 3048.4058, "encoder_q-layer.2": 1438.7462, "encoder_q-layer.3": 1551.2686, "encoder_q-layer.4": 1568.2692, "encoder_q-layer.5": 1607.854, "encoder_q-layer.6": 1805.1941, "encoder_q-layer.7": 1890.431, "encoder_q-layer.8": 1718.4724, "encoder_q-layer.9": 1205.2246, "epoch": 0.32, "inbatch_neg_score": 0.1852, "inbatch_pos_score": 0.7456, "learning_rate": 2.8222222222222223e-05, "loss": 3.8768, "norm_diff": 0.0951, "norm_loss": 0.0, "num_token_doc": 67.026, "num_token_overlap": 11.76, "num_token_query": 31.5065, "num_token_union": 65.3028, "num_word_context": 202.6333, "num_word_doc": 50.041, "num_word_query": 23.3996, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2558.6387, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1863, "query_norm": 1.2892, "queue_k_norm": 1.3801, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5065, "sent_len_1": 67.026, "sent_len_max_0": 127.5425, "sent_len_max_1": 190.135, "stdk": 0.0475, "stdq": 0.0425, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 49200 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.8887, "doc_norm": 1.3813, "encoder_q-embeddings": 1303.5764, "encoder_q-layer.0": 858.5438, "encoder_q-layer.1": 929.1498, "encoder_q-layer.10": 1229.859, "encoder_q-layer.11": 3018.5168, "encoder_q-layer.2": 1028.9055, "encoder_q-layer.3": 1067.406, "encoder_q-layer.4": 1161.1744, "encoder_q-layer.5": 1108.1952, "encoder_q-layer.6": 1259.7573, "encoder_q-layer.7": 1295.4062, "encoder_q-layer.8": 1371.4332, "encoder_q-layer.9": 1198.3774, "epoch": 0.32, "inbatch_neg_score": 0.19, "inbatch_pos_score": 0.7588, "learning_rate": 2.816666666666667e-05, "loss": 3.8887, "norm_diff": 0.0745, "norm_loss": 0.0, "num_token_doc": 66.8691, "num_token_overlap": 11.6749, "num_token_query": 31.4138, "num_token_union": 65.209, "num_word_context": 202.3688, "num_word_doc": 49.9027, "num_word_query": 23.3222, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2125.2412, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.191, "query_norm": 1.3068, "queue_k_norm": 1.3811, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4138, "sent_len_1": 66.8691, "sent_len_max_0": 127.5512, "sent_len_max_1": 189.3512, "stdk": 0.0473, "stdq": 0.043, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 49300 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.8816, "doc_norm": 1.3729, "encoder_q-embeddings": 2460.5962, "encoder_q-layer.0": 1563.3051, "encoder_q-layer.1": 1696.8199, "encoder_q-layer.10": 2474.7761, "encoder_q-layer.11": 5837.6953, "encoder_q-layer.2": 1967.5659, "encoder_q-layer.3": 2009.7672, "encoder_q-layer.4": 2026.1208, "encoder_q-layer.5": 2075.2166, "encoder_q-layer.6": 2261.8855, "encoder_q-layer.7": 2527.6685, "encoder_q-layer.8": 2752.8462, "encoder_q-layer.9": 2401.4761, "epoch": 0.32, "inbatch_neg_score": 0.193, "inbatch_pos_score": 0.7266, "learning_rate": 2.811111111111111e-05, "loss": 3.8816, "norm_diff": 0.0739, "norm_loss": 0.0, "num_token_doc": 66.9686, "num_token_overlap": 11.627, "num_token_query": 31.2731, "num_token_union": 65.253, "num_word_context": 202.9383, "num_word_doc": 49.9587, "num_word_query": 23.224, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4037.8238, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1917, "query_norm": 1.299, "queue_k_norm": 1.3809, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2731, "sent_len_1": 66.9686, "sent_len_max_0": 127.3688, "sent_len_max_1": 189.1987, "stdk": 0.047, "stdq": 0.0426, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 49400 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.863, "doc_norm": 1.3729, "encoder_q-embeddings": 2501.8481, "encoder_q-layer.0": 1636.3204, "encoder_q-layer.1": 1758.7875, "encoder_q-layer.10": 2402.3811, "encoder_q-layer.11": 5825.8291, "encoder_q-layer.2": 2035.173, "encoder_q-layer.3": 2184.4858, "encoder_q-layer.4": 2266.5203, "encoder_q-layer.5": 2115.7219, "encoder_q-layer.6": 2301.6125, "encoder_q-layer.7": 2393.4456, "encoder_q-layer.8": 2674.0796, "encoder_q-layer.9": 2349.6775, "epoch": 0.32, "inbatch_neg_score": 0.1929, "inbatch_pos_score": 0.7354, "learning_rate": 2.8055555555555557e-05, "loss": 3.863, "norm_diff": 0.0808, "norm_loss": 0.0, "num_token_doc": 66.6177, "num_token_overlap": 11.6423, "num_token_query": 31.3129, "num_token_union": 64.9768, "num_word_context": 202.3773, "num_word_doc": 49.7378, "num_word_query": 23.2526, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4092.5558, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1925, "query_norm": 1.2922, "queue_k_norm": 1.3788, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3129, "sent_len_1": 66.6177, "sent_len_max_0": 127.4775, "sent_len_max_1": 190.9087, "stdk": 0.047, "stdq": 0.0423, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 49500 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.8963, "doc_norm": 1.3768, "encoder_q-embeddings": 2282.2361, "encoder_q-layer.0": 1525.9324, "encoder_q-layer.1": 1630.9631, "encoder_q-layer.10": 2383.7136, "encoder_q-layer.11": 5842.5322, "encoder_q-layer.2": 1887.6171, "encoder_q-layer.3": 1965.6317, "encoder_q-layer.4": 2159.6533, "encoder_q-layer.5": 2143.3416, "encoder_q-layer.6": 2195.22, "encoder_q-layer.7": 2293.8164, "encoder_q-layer.8": 2624.2019, "encoder_q-layer.9": 2293.5977, "epoch": 0.32, "inbatch_neg_score": 0.1888, "inbatch_pos_score": 0.7363, "learning_rate": 2.8000000000000003e-05, "loss": 3.8963, "norm_diff": 0.082, "norm_loss": 0.0, "num_token_doc": 66.8746, "num_token_overlap": 11.6181, "num_token_query": 31.2782, "num_token_union": 65.1901, "num_word_context": 202.3091, "num_word_doc": 49.9197, "num_word_query": 23.243, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4019.8957, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1896, "query_norm": 1.2948, "queue_k_norm": 1.3804, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2782, "sent_len_1": 66.8746, "sent_len_max_0": 127.4712, "sent_len_max_1": 189.865, "stdk": 0.0472, "stdq": 0.0423, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 49600 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.8825, "doc_norm": 1.3812, "encoder_q-embeddings": 4059.4241, "encoder_q-layer.0": 2781.4092, "encoder_q-layer.1": 3000.7854, "encoder_q-layer.10": 2567.8425, "encoder_q-layer.11": 5798.8472, "encoder_q-layer.2": 3169.3884, "encoder_q-layer.3": 3227.4277, "encoder_q-layer.4": 3141.6011, "encoder_q-layer.5": 3167.646, "encoder_q-layer.6": 2818.113, "encoder_q-layer.7": 2751.2456, "encoder_q-layer.8": 2670.5081, "encoder_q-layer.9": 2360.739, "epoch": 0.32, "inbatch_neg_score": 0.1929, "inbatch_pos_score": 0.7729, "learning_rate": 2.7944444444444445e-05, "loss": 3.8825, "norm_diff": 0.0843, "norm_loss": 0.0, "num_token_doc": 66.7346, "num_token_overlap": 11.6917, "num_token_query": 31.347, "num_token_union": 65.0507, "num_word_context": 201.9643, "num_word_doc": 49.7695, "num_word_query": 23.2821, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5038.3785, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1943, "query_norm": 1.2969, "queue_k_norm": 1.3795, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.347, "sent_len_1": 66.7346, "sent_len_max_0": 127.535, "sent_len_max_1": 190.9675, "stdk": 0.0473, "stdq": 0.0424, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 49700 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.8905, "doc_norm": 1.3792, "encoder_q-embeddings": 2072.1179, "encoder_q-layer.0": 1313.9539, "encoder_q-layer.1": 1356.5336, "encoder_q-layer.10": 2400.3718, "encoder_q-layer.11": 5908.415, "encoder_q-layer.2": 1543.326, "encoder_q-layer.3": 1543.2438, "encoder_q-layer.4": 1561.4081, "encoder_q-layer.5": 1609.2306, "encoder_q-layer.6": 1808.9781, "encoder_q-layer.7": 2177.292, "encoder_q-layer.8": 2495.9385, "encoder_q-layer.9": 2273.9287, "epoch": 0.32, "inbatch_neg_score": 0.196, "inbatch_pos_score": 0.7607, "learning_rate": 2.788888888888889e-05, "loss": 3.8905, "norm_diff": 0.0918, "norm_loss": 0.0, "num_token_doc": 66.6949, "num_token_overlap": 11.7013, "num_token_query": 31.436, "num_token_union": 65.0837, "num_word_context": 202.2635, "num_word_doc": 49.7671, "num_word_query": 23.3628, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3708.8524, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1962, "query_norm": 1.2874, "queue_k_norm": 1.38, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.436, "sent_len_1": 66.6949, "sent_len_max_0": 127.4975, "sent_len_max_1": 189.7287, "stdk": 0.0473, "stdq": 0.0421, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 49800 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 3.8851, "doc_norm": 1.3738, "encoder_q-embeddings": 2730.3889, "encoder_q-layer.0": 1797.5819, "encoder_q-layer.1": 1867.3536, "encoder_q-layer.10": 2733.7139, "encoder_q-layer.11": 6349.7227, "encoder_q-layer.2": 2120.7515, "encoder_q-layer.3": 2174.6174, "encoder_q-layer.4": 2195.8357, "encoder_q-layer.5": 2077.7383, "encoder_q-layer.6": 2234.502, "encoder_q-layer.7": 2425.6204, "encoder_q-layer.8": 2760.8005, "encoder_q-layer.9": 2577.8457, "epoch": 0.32, "inbatch_neg_score": 0.1968, "inbatch_pos_score": 0.7422, "learning_rate": 2.7833333333333333e-05, "loss": 3.8851, "norm_diff": 0.0771, "norm_loss": 0.0, "num_token_doc": 66.8657, "num_token_overlap": 11.6492, "num_token_query": 31.2875, "num_token_union": 65.1544, "num_word_context": 202.174, "num_word_doc": 49.9193, "num_word_query": 23.2366, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4353.5308, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.197, "query_norm": 1.2967, "queue_k_norm": 1.3798, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2875, "sent_len_1": 66.8657, "sent_len_max_0": 127.1587, "sent_len_max_1": 186.6387, "stdk": 0.047, "stdq": 0.0426, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 49900 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.8695, "doc_norm": 1.385, "encoder_q-embeddings": 3298.375, "encoder_q-layer.0": 2269.7529, "encoder_q-layer.1": 2468.2585, "encoder_q-layer.10": 2655.2656, "encoder_q-layer.11": 6193.1035, "encoder_q-layer.2": 2458.7246, "encoder_q-layer.3": 2482.6553, "encoder_q-layer.4": 2541.6328, "encoder_q-layer.5": 2588.396, "encoder_q-layer.6": 2703.9597, "encoder_q-layer.7": 2757.9648, "encoder_q-layer.8": 2984.5273, "encoder_q-layer.9": 2516.1523, "epoch": 0.33, "inbatch_neg_score": 0.1964, "inbatch_pos_score": 0.7612, "learning_rate": 2.777777777777778e-05, "loss": 3.8695, "norm_diff": 0.0645, "norm_loss": 0.0, "num_token_doc": 66.9579, "num_token_overlap": 11.6529, "num_token_query": 31.345, "num_token_union": 65.1919, "num_word_context": 202.3459, "num_word_doc": 49.9264, "num_word_query": 23.2951, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4663.9513, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.196, "query_norm": 1.3205, "queue_k_norm": 1.3794, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.345, "sent_len_1": 66.9579, "sent_len_max_0": 127.3675, "sent_len_max_1": 189.0437, "stdk": 0.0474, "stdq": 0.0435, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 50000 }, { "dev_runtime": 29.3657, "dev_samples_per_second": 2.179, "dev_steps_per_second": 0.034, "epoch": 0.33, "step": 50000, "test_accuracy": 92.431640625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4339442551136017, "test_doc_norm": 1.344964623451233, "test_inbatch_neg_score": 0.5194107294082642, "test_inbatch_pos_score": 1.4018628597259521, "test_loss": 0.4339442551136017, "test_loss_align": 1.0430909395217896, "test_loss_unif": 3.916745185852051, "test_loss_unif_q@queue": 3.9167449474334717, "test_norm_diff": 0.06980408728122711, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.1797257363796234, "test_query_norm": 1.4147686958312988, "test_queue_k_norm": 1.379556655883789, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.040947720408439636, "test_stdq": 0.04189923778176308, "test_stdqueue_k": 0.047364167869091034, "test_stdqueue_q": 0.0 }, { "dev_runtime": 29.3657, "dev_samples_per_second": 2.179, "dev_steps_per_second": 0.034, "epoch": 0.33, "eval_beir-arguana_ndcg@10": 0.35176, "eval_beir-arguana_recall@10": 0.60313, "eval_beir-arguana_recall@100": 0.90967, "eval_beir-arguana_recall@20": 0.72973, "eval_beir-avg_ndcg@10": 0.3670544166666666, "eval_beir-avg_recall@10": 0.43979375000000004, "eval_beir-avg_recall@100": 0.6179381666666668, "eval_beir-avg_recall@20": 0.4971535833333333, "eval_beir-cqadupstack_ndcg@10": 0.25050416666666664, "eval_beir-cqadupstack_recall@10": 0.34187750000000006, "eval_beir-cqadupstack_recall@100": 0.5721116666666667, "eval_beir-cqadupstack_recall@20": 0.40751583333333335, "eval_beir-fiqa_ndcg@10": 0.23405, "eval_beir-fiqa_recall@10": 0.29222, "eval_beir-fiqa_recall@100": 0.56039, "eval_beir-fiqa_recall@20": 0.37148, "eval_beir-nfcorpus_ndcg@10": 0.29692, "eval_beir-nfcorpus_recall@10": 0.14435, "eval_beir-nfcorpus_recall@100": 0.27178, "eval_beir-nfcorpus_recall@20": 0.17417, "eval_beir-nq_ndcg@10": 0.27192, "eval_beir-nq_recall@10": 0.44508, "eval_beir-nq_recall@100": 0.78394, "eval_beir-nq_recall@20": 0.55497, "eval_beir-quora_ndcg@10": 0.75343, "eval_beir-quora_recall@10": 0.86328, "eval_beir-quora_recall@100": 0.97266, "eval_beir-quora_recall@20": 0.91157, "eval_beir-scidocs_ndcg@10": 0.14625, "eval_beir-scidocs_recall@10": 0.15332, "eval_beir-scidocs_recall@100": 0.34875, "eval_beir-scidocs_recall@20": 0.20712, "eval_beir-scifact_ndcg@10": 0.61574, "eval_beir-scifact_recall@10": 0.80056, "eval_beir-scifact_recall@100": 0.89489, "eval_beir-scifact_recall@20": 0.83833, "eval_beir-trec-covid_ndcg@10": 0.55215, "eval_beir-trec-covid_recall@10": 0.62, "eval_beir-trec-covid_recall@100": 0.43, "eval_beir-trec-covid_recall@20": 0.559, "eval_beir-webis-touche2020_ndcg@10": 0.19782, "eval_beir-webis-touche2020_recall@10": 0.13412, "eval_beir-webis-touche2020_recall@100": 0.43519, "eval_beir-webis-touche2020_recall@20": 0.21765, "eval_senteval-avg_sts": 0.7520131146739149, "eval_senteval-sickr_spearman": 0.7118272634182193, "eval_senteval-stsb_spearman": 0.7921989659296104, "step": 50000, "test_accuracy": 92.431640625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4339442551136017, "test_doc_norm": 1.344964623451233, "test_inbatch_neg_score": 0.5194107294082642, "test_inbatch_pos_score": 1.4018628597259521, "test_loss": 0.4339442551136017, "test_loss_align": 1.0430909395217896, "test_loss_unif": 3.916745185852051, "test_loss_unif_q@queue": 3.9167449474334717, "test_norm_diff": 0.06980408728122711, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.1797257363796234, "test_query_norm": 1.4147686958312988, "test_queue_k_norm": 1.379556655883789, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.040947720408439636, "test_stdq": 0.04189923778176308, "test_stdqueue_k": 0.047364167869091034, "test_stdqueue_q": 0.0 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.8811, "doc_norm": 1.3811, "encoder_q-embeddings": 2226.6299, "encoder_q-layer.0": 1498.1362, "encoder_q-layer.1": 1529.0079, "encoder_q-layer.10": 2348.7463, "encoder_q-layer.11": 5920.0879, "encoder_q-layer.2": 1703.2255, "encoder_q-layer.3": 1718.9235, "encoder_q-layer.4": 1748.647, "encoder_q-layer.5": 1738.0765, "encoder_q-layer.6": 1936.0978, "encoder_q-layer.7": 2114.3293, "encoder_q-layer.8": 2408.2773, "encoder_q-layer.9": 2264.397, "epoch": 0.33, "inbatch_neg_score": 0.2, "inbatch_pos_score": 0.7358, "learning_rate": 2.772222222222222e-05, "loss": 3.8811, "norm_diff": 0.1063, "norm_loss": 0.0, "num_token_doc": 66.7825, "num_token_overlap": 11.6778, "num_token_query": 31.3152, "num_token_union": 65.0815, "num_word_context": 202.2202, "num_word_doc": 49.8304, "num_word_query": 23.2352, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3825.5863, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2, "query_norm": 1.2748, "queue_k_norm": 1.3813, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3152, "sent_len_1": 66.7825, "sent_len_max_0": 127.56, "sent_len_max_1": 189.1012, "stdk": 0.0473, "stdq": 0.0418, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 50100 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.8629, "doc_norm": 1.3855, "encoder_q-embeddings": 3365.562, "encoder_q-layer.0": 2301.1785, "encoder_q-layer.1": 2569.9456, "encoder_q-layer.10": 2497.2698, "encoder_q-layer.11": 6033.5107, "encoder_q-layer.2": 2766.4316, "encoder_q-layer.3": 2731.344, "encoder_q-layer.4": 2705.0696, "encoder_q-layer.5": 2442.3325, "encoder_q-layer.6": 2515.9165, "encoder_q-layer.7": 2333.2412, "encoder_q-layer.8": 2401.6606, "encoder_q-layer.9": 2207.7397, "epoch": 0.33, "inbatch_neg_score": 0.1974, "inbatch_pos_score": 0.7397, "learning_rate": 2.7666666666666667e-05, "loss": 3.8629, "norm_diff": 0.1129, "norm_loss": 0.0, "num_token_doc": 66.8434, "num_token_overlap": 11.773, "num_token_query": 31.5066, "num_token_union": 65.1189, "num_word_context": 202.3166, "num_word_doc": 49.8684, "num_word_query": 23.3981, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4618.3283, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.197, "query_norm": 1.2726, "queue_k_norm": 1.3803, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5066, "sent_len_1": 66.8434, "sent_len_max_0": 127.5512, "sent_len_max_1": 190.0712, "stdk": 0.0475, "stdq": 0.0419, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 50200 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.8962, "doc_norm": 1.3781, "encoder_q-embeddings": 3365.4973, "encoder_q-layer.0": 2366.8665, "encoder_q-layer.1": 2619.8049, "encoder_q-layer.10": 2858.2107, "encoder_q-layer.11": 6409.6318, "encoder_q-layer.2": 3151.384, "encoder_q-layer.3": 3164.9126, "encoder_q-layer.4": 3417.7036, "encoder_q-layer.5": 3283.1831, "encoder_q-layer.6": 3349.5383, "encoder_q-layer.7": 3347.3479, "encoder_q-layer.8": 3281.6572, "encoder_q-layer.9": 2762.1233, "epoch": 0.33, "inbatch_neg_score": 0.193, "inbatch_pos_score": 0.7539, "learning_rate": 2.761111111111111e-05, "loss": 3.8962, "norm_diff": 0.0786, "norm_loss": 0.0, "num_token_doc": 66.8285, "num_token_overlap": 11.6606, "num_token_query": 31.4056, "num_token_union": 65.2257, "num_word_context": 202.5689, "num_word_doc": 49.8961, "num_word_query": 23.3409, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5211.3333, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1934, "query_norm": 1.2995, "queue_k_norm": 1.3801, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4056, "sent_len_1": 66.8285, "sent_len_max_0": 127.4912, "sent_len_max_1": 189.77, "stdk": 0.0472, "stdq": 0.0431, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 50300 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 3.8954, "doc_norm": 1.3773, "encoder_q-embeddings": 3387.1072, "encoder_q-layer.0": 2300.9851, "encoder_q-layer.1": 2609.3486, "encoder_q-layer.10": 2484.1223, "encoder_q-layer.11": 6661.5483, "encoder_q-layer.2": 2910.7087, "encoder_q-layer.3": 2837.9438, "encoder_q-layer.4": 2990.4697, "encoder_q-layer.5": 2953.7761, "encoder_q-layer.6": 2973.5835, "encoder_q-layer.7": 2863.3748, "encoder_q-layer.8": 2770.8552, "encoder_q-layer.9": 2333.126, "epoch": 0.33, "inbatch_neg_score": 0.1931, "inbatch_pos_score": 0.7598, "learning_rate": 2.7555555555555555e-05, "loss": 3.8954, "norm_diff": 0.0939, "norm_loss": 0.0, "num_token_doc": 66.7017, "num_token_overlap": 11.6728, "num_token_query": 31.3718, "num_token_union": 65.087, "num_word_context": 202.3829, "num_word_doc": 49.7546, "num_word_query": 23.2827, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4975.7627, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1934, "query_norm": 1.2834, "queue_k_norm": 1.3802, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3718, "sent_len_1": 66.7017, "sent_len_max_0": 127.4513, "sent_len_max_1": 188.8587, "stdk": 0.0472, "stdq": 0.0424, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 50400 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 3.8923, "doc_norm": 1.3815, "encoder_q-embeddings": 5526.3696, "encoder_q-layer.0": 3800.3831, "encoder_q-layer.1": 3879.2673, "encoder_q-layer.10": 2258.9104, "encoder_q-layer.11": 5925.1821, "encoder_q-layer.2": 3724.6016, "encoder_q-layer.3": 3935.7097, "encoder_q-layer.4": 3700.1509, "encoder_q-layer.5": 3549.4224, "encoder_q-layer.6": 3524.8943, "encoder_q-layer.7": 2921.7058, "encoder_q-layer.8": 2817.9685, "encoder_q-layer.9": 2339.1592, "epoch": 0.33, "inbatch_neg_score": 0.194, "inbatch_pos_score": 0.7358, "learning_rate": 2.7500000000000004e-05, "loss": 3.8923, "norm_diff": 0.1039, "norm_loss": 0.0, "num_token_doc": 66.8915, "num_token_overlap": 11.6796, "num_token_query": 31.3214, "num_token_union": 65.1759, "num_word_context": 202.8314, "num_word_doc": 49.9337, "num_word_query": 23.2637, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6038.8669, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1943, "query_norm": 1.2775, "queue_k_norm": 1.3796, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3214, "sent_len_1": 66.8915, "sent_len_max_0": 127.4287, "sent_len_max_1": 187.235, "stdk": 0.0474, "stdq": 0.0421, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 50500 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.8893, "doc_norm": 1.3756, "encoder_q-embeddings": 4619.5513, "encoder_q-layer.0": 3405.7529, "encoder_q-layer.1": 3807.9368, "encoder_q-layer.10": 2501.5117, "encoder_q-layer.11": 6423.4683, "encoder_q-layer.2": 4365.2422, "encoder_q-layer.3": 4406.3496, "encoder_q-layer.4": 4687.936, "encoder_q-layer.5": 4635.7529, "encoder_q-layer.6": 4524.3618, "encoder_q-layer.7": 4246.2319, "encoder_q-layer.8": 3436.3384, "encoder_q-layer.9": 2638.5105, "epoch": 0.33, "inbatch_neg_score": 0.1969, "inbatch_pos_score": 0.7471, "learning_rate": 2.7444444444444443e-05, "loss": 3.8893, "norm_diff": 0.0972, "norm_loss": 0.0, "num_token_doc": 66.8694, "num_token_overlap": 11.6115, "num_token_query": 31.2361, "num_token_union": 65.1419, "num_word_context": 202.3826, "num_word_doc": 49.8843, "num_word_query": 23.2082, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6454.648, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1968, "query_norm": 1.2784, "queue_k_norm": 1.3798, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2361, "sent_len_1": 66.8694, "sent_len_max_0": 127.515, "sent_len_max_1": 190.8725, "stdk": 0.0471, "stdq": 0.0422, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 50600 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.882, "doc_norm": 1.3788, "encoder_q-embeddings": 5106.5767, "encoder_q-layer.0": 4120.2188, "encoder_q-layer.1": 3650.1245, "encoder_q-layer.10": 2728.8384, "encoder_q-layer.11": 6557.6431, "encoder_q-layer.2": 4042.6575, "encoder_q-layer.3": 3985.6753, "encoder_q-layer.4": 4390.3486, "encoder_q-layer.5": 4574.1338, "encoder_q-layer.6": 4015.4424, "encoder_q-layer.7": 3661.6533, "encoder_q-layer.8": 3228.5073, "encoder_q-layer.9": 2617.8657, "epoch": 0.33, "inbatch_neg_score": 0.199, "inbatch_pos_score": 0.7349, "learning_rate": 2.7388888888888892e-05, "loss": 3.882, "norm_diff": 0.1092, "norm_loss": 0.0, "num_token_doc": 66.5691, "num_token_overlap": 11.6607, "num_token_query": 31.3849, "num_token_union": 64.9757, "num_word_context": 201.8817, "num_word_doc": 49.6667, "num_word_query": 23.3231, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6230.6017, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1982, "query_norm": 1.2696, "queue_k_norm": 1.3799, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3849, "sent_len_1": 66.5691, "sent_len_max_0": 127.4625, "sent_len_max_1": 190.105, "stdk": 0.0472, "stdq": 0.0418, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 50700 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.8992, "doc_norm": 1.3794, "encoder_q-embeddings": 4470.2295, "encoder_q-layer.0": 3106.2922, "encoder_q-layer.1": 3356.054, "encoder_q-layer.10": 2806.7681, "encoder_q-layer.11": 6327.7876, "encoder_q-layer.2": 3631.9746, "encoder_q-layer.3": 4100.5225, "encoder_q-layer.4": 4191.3438, "encoder_q-layer.5": 3934.1184, "encoder_q-layer.6": 4443.1733, "encoder_q-layer.7": 4156.3389, "encoder_q-layer.8": 3509.46, "encoder_q-layer.9": 2672.3123, "epoch": 0.33, "inbatch_neg_score": 0.1937, "inbatch_pos_score": 0.7607, "learning_rate": 2.733333333333333e-05, "loss": 3.8992, "norm_diff": 0.0766, "norm_loss": 0.0, "num_token_doc": 66.8143, "num_token_overlap": 11.5687, "num_token_query": 31.1382, "num_token_union": 65.0733, "num_word_context": 202.4075, "num_word_doc": 49.821, "num_word_query": 23.1087, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6033.4259, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1938, "query_norm": 1.3028, "queue_k_norm": 1.3813, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.1382, "sent_len_1": 66.8143, "sent_len_max_0": 127.4175, "sent_len_max_1": 190.6987, "stdk": 0.0472, "stdq": 0.0432, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 50800 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.8773, "doc_norm": 1.3744, "encoder_q-embeddings": 2994.3328, "encoder_q-layer.0": 2085.0256, "encoder_q-layer.1": 2380.7122, "encoder_q-layer.10": 2425.5647, "encoder_q-layer.11": 6245.478, "encoder_q-layer.2": 2665.8584, "encoder_q-layer.3": 2856.2388, "encoder_q-layer.4": 2887.655, "encoder_q-layer.5": 2787.3289, "encoder_q-layer.6": 2597.8699, "encoder_q-layer.7": 2471.1064, "encoder_q-layer.8": 2612.9939, "encoder_q-layer.9": 2267.8254, "epoch": 0.33, "inbatch_neg_score": 0.1991, "inbatch_pos_score": 0.748, "learning_rate": 2.727777777777778e-05, "loss": 3.8773, "norm_diff": 0.0929, "norm_loss": 0.0, "num_token_doc": 66.8297, "num_token_overlap": 11.6375, "num_token_query": 31.2951, "num_token_union": 65.1354, "num_word_context": 202.4861, "num_word_doc": 49.8677, "num_word_query": 23.2399, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4688.2765, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1998, "query_norm": 1.2815, "queue_k_norm": 1.3802, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2951, "sent_len_1": 66.8297, "sent_len_max_0": 127.5113, "sent_len_max_1": 189.1425, "stdk": 0.047, "stdq": 0.0421, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 50900 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.8643, "doc_norm": 1.3861, "encoder_q-embeddings": 2829.031, "encoder_q-layer.0": 1848.0435, "encoder_q-layer.1": 1982.4996, "encoder_q-layer.10": 2463.7986, "encoder_q-layer.11": 6168.1611, "encoder_q-layer.2": 2377.8062, "encoder_q-layer.3": 2439.4353, "encoder_q-layer.4": 2721.769, "encoder_q-layer.5": 2641.1128, "encoder_q-layer.6": 2710.8884, "encoder_q-layer.7": 2821.645, "encoder_q-layer.8": 2891.5007, "encoder_q-layer.9": 2484.0369, "epoch": 0.33, "inbatch_neg_score": 0.2023, "inbatch_pos_score": 0.7505, "learning_rate": 2.7222222222222223e-05, "loss": 3.8643, "norm_diff": 0.0974, "norm_loss": 0.0, "num_token_doc": 66.9739, "num_token_overlap": 11.763, "num_token_query": 31.564, "num_token_union": 65.2774, "num_word_context": 202.4591, "num_word_doc": 49.9763, "num_word_query": 23.4339, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4536.4648, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2021, "query_norm": 1.2888, "queue_k_norm": 1.382, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.564, "sent_len_1": 66.9739, "sent_len_max_0": 127.6762, "sent_len_max_1": 189.315, "stdk": 0.0475, "stdq": 0.0423, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 51000 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.8614, "doc_norm": 1.3877, "encoder_q-embeddings": 2471.1531, "encoder_q-layer.0": 1625.1107, "encoder_q-layer.1": 1696.3625, "encoder_q-layer.10": 2484.7124, "encoder_q-layer.11": 6369.3423, "encoder_q-layer.2": 1837.5231, "encoder_q-layer.3": 1897.7341, "encoder_q-layer.4": 2068.3418, "encoder_q-layer.5": 2085.314, "encoder_q-layer.6": 2299.1084, "encoder_q-layer.7": 2517.804, "encoder_q-layer.8": 2862.6167, "encoder_q-layer.9": 2504.8965, "epoch": 0.33, "inbatch_neg_score": 0.1985, "inbatch_pos_score": 0.749, "learning_rate": 2.716666666666667e-05, "loss": 3.8614, "norm_diff": 0.0827, "norm_loss": 0.0, "num_token_doc": 66.874, "num_token_overlap": 11.7303, "num_token_query": 31.4862, "num_token_union": 65.1519, "num_word_context": 202.2233, "num_word_doc": 49.8683, "num_word_query": 23.3866, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4294.089, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.198, "query_norm": 1.305, "queue_k_norm": 1.3801, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4862, "sent_len_1": 66.874, "sent_len_max_0": 127.4575, "sent_len_max_1": 191.9913, "stdk": 0.0475, "stdq": 0.0431, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 51100 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.8688, "doc_norm": 1.3853, "encoder_q-embeddings": 2706.2893, "encoder_q-layer.0": 1866.2478, "encoder_q-layer.1": 2000.9967, "encoder_q-layer.10": 2467.5315, "encoder_q-layer.11": 6268.9824, "encoder_q-layer.2": 2427.6113, "encoder_q-layer.3": 2445.2544, "encoder_q-layer.4": 2549.7744, "encoder_q-layer.5": 2609.9158, "encoder_q-layer.6": 2792.9912, "encoder_q-layer.7": 2541.3281, "encoder_q-layer.8": 2630.5662, "encoder_q-layer.9": 2308.6072, "epoch": 0.33, "inbatch_neg_score": 0.2018, "inbatch_pos_score": 0.7666, "learning_rate": 2.7111111111111114e-05, "loss": 3.8688, "norm_diff": 0.093, "norm_loss": 0.0, "num_token_doc": 67.0294, "num_token_overlap": 11.7059, "num_token_query": 31.3801, "num_token_union": 65.2443, "num_word_context": 202.4654, "num_word_doc": 49.9967, "num_word_query": 23.3106, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4485.1771, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2029, "query_norm": 1.2924, "queue_k_norm": 1.3818, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3801, "sent_len_1": 67.0294, "sent_len_max_0": 127.6075, "sent_len_max_1": 190.76, "stdk": 0.0475, "stdq": 0.0425, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 51200 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 3.8748, "doc_norm": 1.3803, "encoder_q-embeddings": 3917.1218, "encoder_q-layer.0": 2739.5979, "encoder_q-layer.1": 2902.6562, "encoder_q-layer.10": 2420.6689, "encoder_q-layer.11": 6114.6377, "encoder_q-layer.2": 3160.9536, "encoder_q-layer.3": 3120.6208, "encoder_q-layer.4": 3147.5828, "encoder_q-layer.5": 2873.9639, "encoder_q-layer.6": 2695.8677, "encoder_q-layer.7": 2636.4233, "encoder_q-layer.8": 2593.1501, "encoder_q-layer.9": 2318.4143, "epoch": 0.33, "inbatch_neg_score": 0.2029, "inbatch_pos_score": 0.75, "learning_rate": 2.7055555555555557e-05, "loss": 3.8748, "norm_diff": 0.0888, "norm_loss": 0.0, "num_token_doc": 66.7609, "num_token_overlap": 11.6266, "num_token_query": 31.3004, "num_token_union": 65.1289, "num_word_context": 202.4969, "num_word_doc": 49.8071, "num_word_query": 23.2346, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5039.3002, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2029, "query_norm": 1.2916, "queue_k_norm": 1.3817, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3004, "sent_len_1": 66.7609, "sent_len_max_0": 127.48, "sent_len_max_1": 187.8625, "stdk": 0.0472, "stdq": 0.0425, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 51300 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.8851, "doc_norm": 1.3901, "encoder_q-embeddings": 5237.6929, "encoder_q-layer.0": 3505.5291, "encoder_q-layer.1": 3744.2661, "encoder_q-layer.10": 4894.8037, "encoder_q-layer.11": 12239.5225, "encoder_q-layer.2": 4378.7397, "encoder_q-layer.3": 4544.981, "encoder_q-layer.4": 4692.4395, "encoder_q-layer.5": 4043.7729, "encoder_q-layer.6": 4344.8423, "encoder_q-layer.7": 4472.5186, "encoder_q-layer.8": 5010.8994, "encoder_q-layer.9": 4662.0938, "epoch": 0.33, "inbatch_neg_score": 0.2022, "inbatch_pos_score": 0.7456, "learning_rate": 2.7000000000000002e-05, "loss": 3.8851, "norm_diff": 0.0892, "norm_loss": 0.0, "num_token_doc": 66.7998, "num_token_overlap": 11.6736, "num_token_query": 31.385, "num_token_union": 65.1355, "num_word_context": 202.0677, "num_word_doc": 49.8358, "num_word_query": 23.3024, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8446.5817, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2024, "query_norm": 1.3009, "queue_k_norm": 1.3813, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.385, "sent_len_1": 66.7998, "sent_len_max_0": 127.4813, "sent_len_max_1": 189.975, "stdk": 0.0476, "stdq": 0.043, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 51400 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.8694, "doc_norm": 1.3879, "encoder_q-embeddings": 5329.2661, "encoder_q-layer.0": 3676.4443, "encoder_q-layer.1": 3960.5569, "encoder_q-layer.10": 5014.5215, "encoder_q-layer.11": 12641.1143, "encoder_q-layer.2": 4422.8354, "encoder_q-layer.3": 4677.5093, "encoder_q-layer.4": 4825.6304, "encoder_q-layer.5": 4741.6274, "encoder_q-layer.6": 4658.0215, "encoder_q-layer.7": 4651.6553, "encoder_q-layer.8": 5459.9609, "encoder_q-layer.9": 4779.6357, "epoch": 0.34, "inbatch_neg_score": 0.201, "inbatch_pos_score": 0.751, "learning_rate": 2.6944444444444445e-05, "loss": 3.8694, "norm_diff": 0.1215, "norm_loss": 0.0, "num_token_doc": 66.9525, "num_token_overlap": 11.6851, "num_token_query": 31.4581, "num_token_union": 65.2366, "num_word_context": 202.5639, "num_word_doc": 49.9202, "num_word_query": 23.3524, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8722.2344, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2009, "query_norm": 1.2664, "queue_k_norm": 1.3822, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4581, "sent_len_1": 66.9525, "sent_len_max_0": 127.71, "sent_len_max_1": 191.625, "stdk": 0.0475, "stdq": 0.0417, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 51500 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.8779, "doc_norm": 1.3837, "encoder_q-embeddings": 4370.6211, "encoder_q-layer.0": 2899.3828, "encoder_q-layer.1": 3029.1736, "encoder_q-layer.10": 4682.2769, "encoder_q-layer.11": 12398.2363, "encoder_q-layer.2": 3543.6501, "encoder_q-layer.3": 3589.8523, "encoder_q-layer.4": 3883.2075, "encoder_q-layer.5": 3832.6792, "encoder_q-layer.6": 4011.5627, "encoder_q-layer.7": 4432.7705, "encoder_q-layer.8": 5147.1592, "encoder_q-layer.9": 4618.1108, "epoch": 0.34, "inbatch_neg_score": 0.2047, "inbatch_pos_score": 0.7588, "learning_rate": 2.688888888888889e-05, "loss": 3.8779, "norm_diff": 0.1025, "norm_loss": 0.0, "num_token_doc": 66.7775, "num_token_overlap": 11.6768, "num_token_query": 31.3722, "num_token_union": 65.1451, "num_word_context": 202.6096, "num_word_doc": 49.8424, "num_word_query": 23.297, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8120.5058, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2037, "query_norm": 1.2812, "queue_k_norm": 1.3825, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3722, "sent_len_1": 66.7775, "sent_len_max_0": 127.4625, "sent_len_max_1": 188.4013, "stdk": 0.0473, "stdq": 0.0422, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 51600 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.869, "doc_norm": 1.3795, "encoder_q-embeddings": 5149.2578, "encoder_q-layer.0": 3486.2937, "encoder_q-layer.1": 3738.6714, "encoder_q-layer.10": 4585.3926, "encoder_q-layer.11": 11740.4814, "encoder_q-layer.2": 4181.8003, "encoder_q-layer.3": 4479.1812, "encoder_q-layer.4": 4645.4609, "encoder_q-layer.5": 4317.1797, "encoder_q-layer.6": 4745.6826, "encoder_q-layer.7": 5064.3472, "encoder_q-layer.8": 5343.1309, "encoder_q-layer.9": 4416.6431, "epoch": 0.34, "inbatch_neg_score": 0.2055, "inbatch_pos_score": 0.752, "learning_rate": 2.6833333333333333e-05, "loss": 3.869, "norm_diff": 0.0949, "norm_loss": 0.0, "num_token_doc": 66.7028, "num_token_overlap": 11.6763, "num_token_query": 31.3645, "num_token_union": 65.1462, "num_word_context": 202.3094, "num_word_doc": 49.8256, "num_word_query": 23.3161, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8360.186, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2058, "query_norm": 1.2846, "queue_k_norm": 1.383, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3645, "sent_len_1": 66.7028, "sent_len_max_0": 127.46, "sent_len_max_1": 188.4038, "stdk": 0.0472, "stdq": 0.0422, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 51700 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.8735, "doc_norm": 1.3829, "encoder_q-embeddings": 4858.4321, "encoder_q-layer.0": 3275.9487, "encoder_q-layer.1": 3452.7214, "encoder_q-layer.10": 4879.4678, "encoder_q-layer.11": 12059.2939, "encoder_q-layer.2": 3977.4031, "encoder_q-layer.3": 4092.8848, "encoder_q-layer.4": 4649.9531, "encoder_q-layer.5": 4563.731, "encoder_q-layer.6": 4939.7305, "encoder_q-layer.7": 4942.0669, "encoder_q-layer.8": 5189.0801, "encoder_q-layer.9": 4771.9375, "epoch": 0.34, "inbatch_neg_score": 0.2072, "inbatch_pos_score": 0.772, "learning_rate": 2.677777777777778e-05, "loss": 3.8735, "norm_diff": 0.0927, "norm_loss": 0.0, "num_token_doc": 66.6066, "num_token_overlap": 11.664, "num_token_query": 31.3853, "num_token_union": 65.0304, "num_word_context": 202.1304, "num_word_doc": 49.6837, "num_word_query": 23.3005, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8359.6572, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2081, "query_norm": 1.2903, "queue_k_norm": 1.3819, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3853, "sent_len_1": 66.6066, "sent_len_max_0": 127.625, "sent_len_max_1": 189.5762, "stdk": 0.0473, "stdq": 0.0424, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 51800 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.8597, "doc_norm": 1.3859, "encoder_q-embeddings": 41793.4141, "encoder_q-layer.0": 29134.4648, "encoder_q-layer.1": 29596.5254, "encoder_q-layer.10": 5540.2095, "encoder_q-layer.11": 13357.1045, "encoder_q-layer.2": 31164.9219, "encoder_q-layer.3": 34296.9648, "encoder_q-layer.4": 33967.0586, "encoder_q-layer.5": 32534.8828, "encoder_q-layer.6": 35448.8984, "encoder_q-layer.7": 29239.2695, "encoder_q-layer.8": 20613.5742, "encoder_q-layer.9": 7679.8721, "epoch": 0.34, "inbatch_neg_score": 0.2098, "inbatch_pos_score": 0.7661, "learning_rate": 2.6722222222222228e-05, "loss": 3.8597, "norm_diff": 0.1063, "norm_loss": 0.0, "num_token_doc": 66.7805, "num_token_overlap": 11.6713, "num_token_query": 31.4789, "num_token_union": 65.196, "num_word_context": 202.5911, "num_word_doc": 49.8311, "num_word_query": 23.3874, "postclip_grad_norm": 1.0, "preclip_grad_norm": 44179.7014, "preclip_grad_norm_avg": 0.0004, "q@queue_neg_score": 0.2087, "query_norm": 1.2797, "queue_k_norm": 1.3821, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4789, "sent_len_1": 66.7805, "sent_len_max_0": 127.4963, "sent_len_max_1": 189.575, "stdk": 0.0473, "stdq": 0.042, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 51900 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.867, "doc_norm": 1.3792, "encoder_q-embeddings": 4903.1558, "encoder_q-layer.0": 3242.1514, "encoder_q-layer.1": 3672.3938, "encoder_q-layer.10": 4718.8193, "encoder_q-layer.11": 11944.6396, "encoder_q-layer.2": 4136.9253, "encoder_q-layer.3": 4158.5972, "encoder_q-layer.4": 4352.2793, "encoder_q-layer.5": 4318.0283, "encoder_q-layer.6": 4839.248, "encoder_q-layer.7": 5068.3501, "encoder_q-layer.8": 5365.9155, "encoder_q-layer.9": 4574.9668, "epoch": 0.34, "inbatch_neg_score": 0.2084, "inbatch_pos_score": 0.7612, "learning_rate": 2.6666666666666667e-05, "loss": 3.867, "norm_diff": 0.0844, "norm_loss": 0.0, "num_token_doc": 66.6911, "num_token_overlap": 11.6371, "num_token_query": 31.2866, "num_token_union": 65.0554, "num_word_context": 202.2429, "num_word_doc": 49.7717, "num_word_query": 23.2433, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8336.9548, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2083, "query_norm": 1.2948, "queue_k_norm": 1.3821, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2866, "sent_len_1": 66.6911, "sent_len_max_0": 127.625, "sent_len_max_1": 190.13, "stdk": 0.0471, "stdq": 0.0426, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 52000 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.8739, "doc_norm": 1.3843, "encoder_q-embeddings": 5560.145, "encoder_q-layer.0": 3927.3547, "encoder_q-layer.1": 4315.6279, "encoder_q-layer.10": 5070.3203, "encoder_q-layer.11": 12152.9277, "encoder_q-layer.2": 4956.7666, "encoder_q-layer.3": 5447.2007, "encoder_q-layer.4": 5960.2886, "encoder_q-layer.5": 5623.3008, "encoder_q-layer.6": 6368.7573, "encoder_q-layer.7": 6027.7793, "encoder_q-layer.8": 5939.2144, "encoder_q-layer.9": 4819.1494, "epoch": 0.34, "inbatch_neg_score": 0.2104, "inbatch_pos_score": 0.7783, "learning_rate": 2.6611111111111116e-05, "loss": 3.8739, "norm_diff": 0.0782, "norm_loss": 0.0, "num_token_doc": 66.9723, "num_token_overlap": 11.6452, "num_token_query": 31.2931, "num_token_union": 65.2213, "num_word_context": 202.4451, "num_word_doc": 49.9962, "num_word_query": 23.2564, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9209.4883, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2111, "query_norm": 1.3061, "queue_k_norm": 1.3824, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2931, "sent_len_1": 66.9723, "sent_len_max_0": 127.2613, "sent_len_max_1": 191.6387, "stdk": 0.0473, "stdq": 0.0429, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 52100 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.8527, "doc_norm": 1.3831, "encoder_q-embeddings": 5935.0894, "encoder_q-layer.0": 3718.3611, "encoder_q-layer.1": 4043.7212, "encoder_q-layer.10": 4818.3281, "encoder_q-layer.11": 12406.582, "encoder_q-layer.2": 4738.8696, "encoder_q-layer.3": 5248.6147, "encoder_q-layer.4": 5643.5723, "encoder_q-layer.5": 5872.4165, "encoder_q-layer.6": 6814.127, "encoder_q-layer.7": 6071.0493, "encoder_q-layer.8": 5722.688, "encoder_q-layer.9": 4850.9175, "epoch": 0.34, "inbatch_neg_score": 0.2203, "inbatch_pos_score": 0.7759, "learning_rate": 2.6555555555555555e-05, "loss": 3.8527, "norm_diff": 0.0715, "norm_loss": 0.0, "num_token_doc": 66.7073, "num_token_overlap": 11.6846, "num_token_query": 31.4598, "num_token_union": 65.1412, "num_word_context": 202.6058, "num_word_doc": 49.8014, "num_word_query": 23.3773, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9419.8032, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2198, "query_norm": 1.3117, "queue_k_norm": 1.3845, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4598, "sent_len_1": 66.7073, "sent_len_max_0": 127.5763, "sent_len_max_1": 189.755, "stdk": 0.0472, "stdq": 0.0428, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 52200 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.846, "doc_norm": 1.3765, "encoder_q-embeddings": 4899.7983, "encoder_q-layer.0": 3320.2891, "encoder_q-layer.1": 3444.8677, "encoder_q-layer.10": 4822.228, "encoder_q-layer.11": 11944.3359, "encoder_q-layer.2": 3974.8606, "encoder_q-layer.3": 4110.6553, "encoder_q-layer.4": 4187.3447, "encoder_q-layer.5": 3883.21, "encoder_q-layer.6": 4255.3491, "encoder_q-layer.7": 4648.729, "encoder_q-layer.8": 5316.2695, "encoder_q-layer.9": 4641.8672, "epoch": 0.34, "inbatch_neg_score": 0.2185, "inbatch_pos_score": 0.7622, "learning_rate": 2.6500000000000004e-05, "loss": 3.846, "norm_diff": 0.0733, "norm_loss": 0.0, "num_token_doc": 66.9308, "num_token_overlap": 11.698, "num_token_query": 31.5494, "num_token_union": 65.2902, "num_word_context": 202.4439, "num_word_doc": 49.9655, "num_word_query": 23.4455, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8251.9458, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2186, "query_norm": 1.3032, "queue_k_norm": 1.3859, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.5494, "sent_len_1": 66.9308, "sent_len_max_0": 127.5925, "sent_len_max_1": 190.4762, "stdk": 0.047, "stdq": 0.0425, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 52300 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.8699, "doc_norm": 1.3818, "encoder_q-embeddings": 8072.5312, "encoder_q-layer.0": 5429.6938, "encoder_q-layer.1": 6140.7524, "encoder_q-layer.10": 4847.9585, "encoder_q-layer.11": 11805.7031, "encoder_q-layer.2": 6743.2642, "encoder_q-layer.3": 7129.3691, "encoder_q-layer.4": 7069.4463, "encoder_q-layer.5": 7019.7114, "encoder_q-layer.6": 7242.9478, "encoder_q-layer.7": 7681.3516, "encoder_q-layer.8": 6854.8706, "encoder_q-layer.9": 5389.4629, "epoch": 0.34, "inbatch_neg_score": 0.217, "inbatch_pos_score": 0.7725, "learning_rate": 2.6444444444444443e-05, "loss": 3.8699, "norm_diff": 0.071, "norm_loss": 0.0, "num_token_doc": 66.7754, "num_token_overlap": 11.6768, "num_token_query": 31.4319, "num_token_union": 65.146, "num_word_context": 202.5266, "num_word_doc": 49.8138, "num_word_query": 23.3522, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11015.1548, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2169, "query_norm": 1.3108, "queue_k_norm": 1.385, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4319, "sent_len_1": 66.7754, "sent_len_max_0": 127.5487, "sent_len_max_1": 190.1312, "stdk": 0.0472, "stdq": 0.0427, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 52400 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.8728, "doc_norm": 1.3804, "encoder_q-embeddings": 14352.3447, "encoder_q-layer.0": 10396.6348, "encoder_q-layer.1": 11949.8799, "encoder_q-layer.10": 4931.6382, "encoder_q-layer.11": 11982.3154, "encoder_q-layer.2": 13961.6289, "encoder_q-layer.3": 14989.5732, "encoder_q-layer.4": 16068.9424, "encoder_q-layer.5": 14532.4072, "encoder_q-layer.6": 12249.7607, "encoder_q-layer.7": 9739.3818, "encoder_q-layer.8": 6481.0386, "encoder_q-layer.9": 5095.2871, "epoch": 0.34, "inbatch_neg_score": 0.2209, "inbatch_pos_score": 0.7612, "learning_rate": 2.6388888888888892e-05, "loss": 3.8728, "norm_diff": 0.0889, "norm_loss": 0.0, "num_token_doc": 66.8027, "num_token_overlap": 11.6502, "num_token_query": 31.332, "num_token_union": 65.1295, "num_word_context": 202.4124, "num_word_doc": 49.8493, "num_word_query": 23.2716, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17811.3822, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2211, "query_norm": 1.2915, "queue_k_norm": 1.3852, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.332, "sent_len_1": 66.8027, "sent_len_max_0": 127.5938, "sent_len_max_1": 191.02, "stdk": 0.0471, "stdq": 0.0419, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 52500 }, { "accuracy": 49.1211, "active_queue_size": 16384.0, "cl_loss": 3.8489, "doc_norm": 1.3869, "encoder_q-embeddings": 5607.9492, "encoder_q-layer.0": 3700.2961, "encoder_q-layer.1": 3978.3145, "encoder_q-layer.10": 4778.0664, "encoder_q-layer.11": 12231.2139, "encoder_q-layer.2": 4645.126, "encoder_q-layer.3": 5130.4429, "encoder_q-layer.4": 5114.6792, "encoder_q-layer.5": 5063.7373, "encoder_q-layer.6": 5702.0137, "encoder_q-layer.7": 5474.8032, "encoder_q-layer.8": 5564.3003, "encoder_q-layer.9": 4932.5776, "epoch": 0.34, "inbatch_neg_score": 0.2249, "inbatch_pos_score": 0.7769, "learning_rate": 2.633333333333333e-05, "loss": 3.8489, "norm_diff": 0.1023, "norm_loss": 0.0, "num_token_doc": 66.7674, "num_token_overlap": 11.6905, "num_token_query": 31.3905, "num_token_union": 65.0714, "num_word_context": 201.8531, "num_word_doc": 49.8093, "num_word_query": 23.2978, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8995.9333, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.226, "query_norm": 1.2845, "queue_k_norm": 1.3862, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3905, "sent_len_1": 66.7674, "sent_len_max_0": 127.525, "sent_len_max_1": 188.9162, "stdk": 0.0473, "stdq": 0.0416, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 52600 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.863, "doc_norm": 1.3895, "encoder_q-embeddings": 5247.438, "encoder_q-layer.0": 3586.3386, "encoder_q-layer.1": 3801.8828, "encoder_q-layer.10": 4705.0, "encoder_q-layer.11": 11168.1875, "encoder_q-layer.2": 4298.6411, "encoder_q-layer.3": 4562.4653, "encoder_q-layer.4": 4657.0205, "encoder_q-layer.5": 4532.6133, "encoder_q-layer.6": 4788.4058, "encoder_q-layer.7": 5052.7466, "encoder_q-layer.8": 5196.3408, "encoder_q-layer.9": 4443.7876, "epoch": 0.34, "inbatch_neg_score": 0.2304, "inbatch_pos_score": 0.791, "learning_rate": 2.627777777777778e-05, "loss": 3.863, "norm_diff": 0.0762, "norm_loss": 0.0, "num_token_doc": 66.8692, "num_token_overlap": 11.6268, "num_token_query": 31.2631, "num_token_union": 65.15, "num_word_context": 202.5908, "num_word_doc": 49.8889, "num_word_query": 23.218, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8267.4825, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.23, "query_norm": 1.3134, "queue_k_norm": 1.3862, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2631, "sent_len_1": 66.8692, "sent_len_max_0": 127.5275, "sent_len_max_1": 188.7025, "stdk": 0.0474, "stdq": 0.0427, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 52700 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 3.8597, "doc_norm": 1.3848, "encoder_q-embeddings": 7921.0474, "encoder_q-layer.0": 5771.96, "encoder_q-layer.1": 6566.0342, "encoder_q-layer.10": 4757.8823, "encoder_q-layer.11": 12439.7598, "encoder_q-layer.2": 7908.1714, "encoder_q-layer.3": 6918.3975, "encoder_q-layer.4": 7769.001, "encoder_q-layer.5": 6054.4336, "encoder_q-layer.6": 6398.5425, "encoder_q-layer.7": 5836.8149, "encoder_q-layer.8": 5742.9819, "encoder_q-layer.9": 4630.8872, "epoch": 0.34, "inbatch_neg_score": 0.2286, "inbatch_pos_score": 0.7563, "learning_rate": 2.6222222222222226e-05, "loss": 3.8597, "norm_diff": 0.0833, "norm_loss": 0.0, "num_token_doc": 66.6647, "num_token_overlap": 11.6365, "num_token_query": 31.2737, "num_token_union": 65.0257, "num_word_context": 202.0767, "num_word_doc": 49.7071, "num_word_query": 23.2077, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10997.3657, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2291, "query_norm": 1.3015, "queue_k_norm": 1.3876, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2737, "sent_len_1": 66.6647, "sent_len_max_0": 127.49, "sent_len_max_1": 190.2025, "stdk": 0.0472, "stdq": 0.0423, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 52800 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.8599, "doc_norm": 1.3882, "encoder_q-embeddings": 9914.2129, "encoder_q-layer.0": 7592.9951, "encoder_q-layer.1": 8450.5361, "encoder_q-layer.10": 4637.7012, "encoder_q-layer.11": 11574.7314, "encoder_q-layer.2": 7851.8184, "encoder_q-layer.3": 8232.9355, "encoder_q-layer.4": 7688.0576, "encoder_q-layer.5": 6521.7725, "encoder_q-layer.6": 7273.5684, "encoder_q-layer.7": 7199.5684, "encoder_q-layer.8": 5925.3379, "encoder_q-layer.9": 4612.8457, "epoch": 0.34, "inbatch_neg_score": 0.23, "inbatch_pos_score": 0.7935, "learning_rate": 2.6166666666666668e-05, "loss": 3.8599, "norm_diff": 0.0761, "norm_loss": 0.0, "num_token_doc": 66.7894, "num_token_overlap": 11.6881, "num_token_query": 31.403, "num_token_union": 65.1138, "num_word_context": 202.5527, "num_word_doc": 49.8584, "num_word_query": 23.3322, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11897.1113, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2314, "query_norm": 1.3121, "queue_k_norm": 1.3896, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.403, "sent_len_1": 66.7894, "sent_len_max_0": 127.3425, "sent_len_max_1": 188.84, "stdk": 0.0473, "stdq": 0.0427, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 52900 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.8586, "doc_norm": 1.3851, "encoder_q-embeddings": 7983.084, "encoder_q-layer.0": 5772.9819, "encoder_q-layer.1": 6065.2974, "encoder_q-layer.10": 4896.1514, "encoder_q-layer.11": 11961.7275, "encoder_q-layer.2": 7295.4863, "encoder_q-layer.3": 7092.4312, "encoder_q-layer.4": 7514.7939, "encoder_q-layer.5": 7622.1636, "encoder_q-layer.6": 7261.8364, "encoder_q-layer.7": 7479.4233, "encoder_q-layer.8": 6486.71, "encoder_q-layer.9": 4690.6694, "epoch": 0.34, "inbatch_neg_score": 0.2286, "inbatch_pos_score": 0.772, "learning_rate": 2.6111111111111114e-05, "loss": 3.8586, "norm_diff": 0.0801, "norm_loss": 0.0, "num_token_doc": 66.7218, "num_token_overlap": 11.6762, "num_token_query": 31.4565, "num_token_union": 65.1509, "num_word_context": 202.4329, "num_word_doc": 49.7455, "num_word_query": 23.3396, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11041.2227, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2289, "query_norm": 1.305, "queue_k_norm": 1.3919, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4565, "sent_len_1": 66.7218, "sent_len_max_0": 127.4338, "sent_len_max_1": 189.3313, "stdk": 0.0472, "stdq": 0.0426, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 53000 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 3.8575, "doc_norm": 1.3917, "encoder_q-embeddings": 4845.6602, "encoder_q-layer.0": 3241.7957, "encoder_q-layer.1": 3474.9426, "encoder_q-layer.10": 5164.4844, "encoder_q-layer.11": 12802.6406, "encoder_q-layer.2": 3882.418, "encoder_q-layer.3": 4038.1899, "encoder_q-layer.4": 4124.3604, "encoder_q-layer.5": 4178.2939, "encoder_q-layer.6": 4543.9287, "encoder_q-layer.7": 4742.3662, "encoder_q-layer.8": 5726.3003, "encoder_q-layer.9": 4990.5381, "epoch": 0.35, "inbatch_neg_score": 0.2324, "inbatch_pos_score": 0.7739, "learning_rate": 2.6055555555555556e-05, "loss": 3.8575, "norm_diff": 0.0824, "norm_loss": 0.0, "num_token_doc": 66.8139, "num_token_overlap": 11.6212, "num_token_query": 31.2616, "num_token_union": 65.1087, "num_word_context": 202.345, "num_word_doc": 49.9104, "num_word_query": 23.2248, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8506.4529, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2321, "query_norm": 1.3093, "queue_k_norm": 1.3894, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2616, "sent_len_1": 66.8139, "sent_len_max_0": 127.4087, "sent_len_max_1": 188.1637, "stdk": 0.0474, "stdq": 0.0427, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 53100 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 3.867, "doc_norm": 1.389, "encoder_q-embeddings": 4556.521, "encoder_q-layer.0": 2860.3953, "encoder_q-layer.1": 3004.6851, "encoder_q-layer.10": 4979.8179, "encoder_q-layer.11": 12439.1689, "encoder_q-layer.2": 3339.5308, "encoder_q-layer.3": 3421.5039, "encoder_q-layer.4": 3623.2722, "encoder_q-layer.5": 3421.4634, "encoder_q-layer.6": 3936.9082, "encoder_q-layer.7": 4245.0142, "encoder_q-layer.8": 5059.8418, "encoder_q-layer.9": 4678.0449, "epoch": 0.35, "inbatch_neg_score": 0.2264, "inbatch_pos_score": 0.7803, "learning_rate": 2.6000000000000002e-05, "loss": 3.867, "norm_diff": 0.0779, "norm_loss": 0.0, "num_token_doc": 66.9266, "num_token_overlap": 11.6872, "num_token_query": 31.3684, "num_token_union": 65.1522, "num_word_context": 202.2125, "num_word_doc": 49.9118, "num_word_query": 23.3018, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7929.608, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2275, "query_norm": 1.3111, "queue_k_norm": 1.3921, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3684, "sent_len_1": 66.9266, "sent_len_max_0": 127.4762, "sent_len_max_1": 190.7825, "stdk": 0.0473, "stdq": 0.0429, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 53200 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 3.8586, "doc_norm": 1.3923, "encoder_q-embeddings": 10628.5518, "encoder_q-layer.0": 7050.1895, "encoder_q-layer.1": 7554.7373, "encoder_q-layer.10": 4789.5278, "encoder_q-layer.11": 12623.9844, "encoder_q-layer.2": 8349.1406, "encoder_q-layer.3": 8855.2969, "encoder_q-layer.4": 8137.5405, "encoder_q-layer.5": 7623.1943, "encoder_q-layer.6": 8207.6309, "encoder_q-layer.7": 7971.502, "encoder_q-layer.8": 6644.8594, "encoder_q-layer.9": 4640.7681, "epoch": 0.35, "inbatch_neg_score": 0.2278, "inbatch_pos_score": 0.7788, "learning_rate": 2.5944444444444444e-05, "loss": 3.8586, "norm_diff": 0.0938, "norm_loss": 0.0, "num_token_doc": 66.672, "num_token_overlap": 11.6454, "num_token_query": 31.2585, "num_token_union": 65.0142, "num_word_context": 202.1925, "num_word_doc": 49.7458, "num_word_query": 23.2037, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12721.1921, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2268, "query_norm": 1.2984, "queue_k_norm": 1.3904, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2585, "sent_len_1": 66.672, "sent_len_max_0": 127.355, "sent_len_max_1": 189.1163, "stdk": 0.0474, "stdq": 0.0424, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 53300 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.8734, "doc_norm": 1.3929, "encoder_q-embeddings": 12172.6533, "encoder_q-layer.0": 8280.7803, "encoder_q-layer.1": 9198.3115, "encoder_q-layer.10": 10199.4072, "encoder_q-layer.11": 25864.1113, "encoder_q-layer.2": 10016.4014, "encoder_q-layer.3": 9830.3564, "encoder_q-layer.4": 10924.0332, "encoder_q-layer.5": 9935.1748, "encoder_q-layer.6": 10947.0391, "encoder_q-layer.7": 10947.7969, "encoder_q-layer.8": 11484.7666, "encoder_q-layer.9": 9790.332, "epoch": 0.35, "inbatch_neg_score": 0.2245, "inbatch_pos_score": 0.771, "learning_rate": 2.588888888888889e-05, "loss": 3.8734, "norm_diff": 0.0997, "norm_loss": 0.0, "num_token_doc": 66.8628, "num_token_overlap": 11.7019, "num_token_query": 31.3641, "num_token_union": 65.1071, "num_word_context": 202.5375, "num_word_doc": 49.939, "num_word_query": 23.2902, "postclip_grad_norm": 1.0, "preclip_grad_norm": 18641.1055, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2245, "query_norm": 1.2932, "queue_k_norm": 1.3927, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3641, "sent_len_1": 66.8628, "sent_len_max_0": 127.4338, "sent_len_max_1": 188.4238, "stdk": 0.0474, "stdq": 0.0423, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 53400 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.8392, "doc_norm": 1.3891, "encoder_q-embeddings": 23961.0449, "encoder_q-layer.0": 17514.6094, "encoder_q-layer.1": 19863.123, "encoder_q-layer.10": 9758.7119, "encoder_q-layer.11": 24165.375, "encoder_q-layer.2": 25080.6191, "encoder_q-layer.3": 25553.1543, "encoder_q-layer.4": 29837.4766, "encoder_q-layer.5": 30307.3281, "encoder_q-layer.6": 28087.9629, "encoder_q-layer.7": 24349.4922, "encoder_q-layer.8": 15730.6826, "encoder_q-layer.9": 10061.8174, "epoch": 0.35, "inbatch_neg_score": 0.2235, "inbatch_pos_score": 0.7681, "learning_rate": 2.5833333333333336e-05, "loss": 3.8392, "norm_diff": 0.1098, "norm_loss": 0.0, "num_token_doc": 66.7069, "num_token_overlap": 11.6959, "num_token_query": 31.4373, "num_token_union": 65.1132, "num_word_context": 202.6908, "num_word_doc": 49.7902, "num_word_query": 23.3674, "postclip_grad_norm": 1.0, "preclip_grad_norm": 34095.6357, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.2233, "query_norm": 1.2792, "queue_k_norm": 1.3933, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4373, "sent_len_1": 66.7069, "sent_len_max_0": 127.4463, "sent_len_max_1": 188.7788, "stdk": 0.0472, "stdq": 0.0417, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 53500 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.8581, "doc_norm": 1.3896, "encoder_q-embeddings": 5724.1655, "encoder_q-layer.0": 3916.0684, "encoder_q-layer.1": 3844.2942, "encoder_q-layer.10": 4838.2192, "encoder_q-layer.11": 12665.9727, "encoder_q-layer.2": 4253.2036, "encoder_q-layer.3": 4281.2402, "encoder_q-layer.4": 4356.6851, "encoder_q-layer.5": 4369.9829, "encoder_q-layer.6": 4625.8555, "encoder_q-layer.7": 4991.5532, "encoder_q-layer.8": 5618.2114, "encoder_q-layer.9": 4972.7998, "epoch": 0.35, "inbatch_neg_score": 0.223, "inbatch_pos_score": 0.7642, "learning_rate": 2.5777777777777778e-05, "loss": 3.8581, "norm_diff": 0.1074, "norm_loss": 0.0, "num_token_doc": 66.8471, "num_token_overlap": 11.6559, "num_token_query": 31.357, "num_token_union": 65.1876, "num_word_context": 202.2755, "num_word_doc": 49.9222, "num_word_query": 23.2981, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8888.3928, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2235, "query_norm": 1.2821, "queue_k_norm": 1.3927, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.357, "sent_len_1": 66.8471, "sent_len_max_0": 127.5387, "sent_len_max_1": 186.7625, "stdk": 0.0473, "stdq": 0.0419, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 53600 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.8496, "doc_norm": 1.3927, "encoder_q-embeddings": 6154.4492, "encoder_q-layer.0": 4236.5947, "encoder_q-layer.1": 4731.1362, "encoder_q-layer.10": 4619.8008, "encoder_q-layer.11": 12313.2646, "encoder_q-layer.2": 4967.4443, "encoder_q-layer.3": 5245.1245, "encoder_q-layer.4": 5397.8311, "encoder_q-layer.5": 5509.9883, "encoder_q-layer.6": 5488.52, "encoder_q-layer.7": 5972.167, "encoder_q-layer.8": 6303.4229, "encoder_q-layer.9": 4755.3979, "epoch": 0.35, "inbatch_neg_score": 0.2274, "inbatch_pos_score": 0.8018, "learning_rate": 2.5722222222222224e-05, "loss": 3.8496, "norm_diff": 0.0795, "norm_loss": 0.0, "num_token_doc": 66.8885, "num_token_overlap": 11.6906, "num_token_query": 31.3693, "num_token_union": 65.1577, "num_word_context": 202.1177, "num_word_doc": 49.9301, "num_word_query": 23.3138, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9415.816, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2269, "query_norm": 1.3133, "queue_k_norm": 1.3926, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3693, "sent_len_1": 66.8885, "sent_len_max_0": 127.4237, "sent_len_max_1": 189.2025, "stdk": 0.0474, "stdq": 0.0431, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 53700 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 3.8402, "doc_norm": 1.3957, "encoder_q-embeddings": 8155.7837, "encoder_q-layer.0": 5709.4126, "encoder_q-layer.1": 5735.9502, "encoder_q-layer.10": 5637.1104, "encoder_q-layer.11": 13511.3535, "encoder_q-layer.2": 6679.9351, "encoder_q-layer.3": 6930.6406, "encoder_q-layer.4": 6506.7241, "encoder_q-layer.5": 6429.6011, "encoder_q-layer.6": 6036.168, "encoder_q-layer.7": 5645.5254, "encoder_q-layer.8": 6066.9175, "encoder_q-layer.9": 5502.8555, "epoch": 0.35, "inbatch_neg_score": 0.2296, "inbatch_pos_score": 0.7705, "learning_rate": 2.5666666666666666e-05, "loss": 3.8402, "norm_diff": 0.1045, "norm_loss": 0.0, "num_token_doc": 66.6716, "num_token_overlap": 11.683, "num_token_query": 31.4345, "num_token_union": 65.06, "num_word_context": 202.1175, "num_word_doc": 49.7363, "num_word_query": 23.337, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10760.5331, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2292, "query_norm": 1.2912, "queue_k_norm": 1.3918, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4345, "sent_len_1": 66.6716, "sent_len_max_0": 127.4875, "sent_len_max_1": 191.7325, "stdk": 0.0474, "stdq": 0.0421, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 53800 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.8495, "doc_norm": 1.3855, "encoder_q-embeddings": 15467.0078, "encoder_q-layer.0": 10676.1611, "encoder_q-layer.1": 9687.042, "encoder_q-layer.10": 5275.9209, "encoder_q-layer.11": 12841.5879, "encoder_q-layer.2": 11323.9014, "encoder_q-layer.3": 11714.6426, "encoder_q-layer.4": 11342.8799, "encoder_q-layer.5": 11171.4473, "encoder_q-layer.6": 10200.0254, "encoder_q-layer.7": 7578.1528, "encoder_q-layer.8": 7119.5449, "encoder_q-layer.9": 5353.6191, "epoch": 0.35, "inbatch_neg_score": 0.2312, "inbatch_pos_score": 0.7681, "learning_rate": 2.5611111111111115e-05, "loss": 3.8495, "norm_diff": 0.0829, "norm_loss": 0.0, "num_token_doc": 66.9564, "num_token_overlap": 11.6556, "num_token_query": 31.3161, "num_token_union": 65.2317, "num_word_context": 202.6277, "num_word_doc": 49.9591, "num_word_query": 23.2473, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15829.6019, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2301, "query_norm": 1.3025, "queue_k_norm": 1.3945, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3161, "sent_len_1": 66.9564, "sent_len_max_0": 127.5062, "sent_len_max_1": 188.5838, "stdk": 0.0471, "stdq": 0.0426, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 53900 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 3.837, "doc_norm": 1.4002, "encoder_q-embeddings": 4474.0205, "encoder_q-layer.0": 2958.1499, "encoder_q-layer.1": 3015.0381, "encoder_q-layer.10": 4972.2046, "encoder_q-layer.11": 11538.9043, "encoder_q-layer.2": 3243.9214, "encoder_q-layer.3": 3241.5005, "encoder_q-layer.4": 3261.7571, "encoder_q-layer.5": 3392.3965, "encoder_q-layer.6": 3912.0066, "encoder_q-layer.7": 4224.6479, "encoder_q-layer.8": 4889.8169, "encoder_q-layer.9": 4704.0527, "epoch": 0.35, "inbatch_neg_score": 0.2274, "inbatch_pos_score": 0.7798, "learning_rate": 2.5555555555555554e-05, "loss": 3.837, "norm_diff": 0.1003, "norm_loss": 0.0, "num_token_doc": 66.8377, "num_token_overlap": 11.6689, "num_token_query": 31.383, "num_token_union": 65.1382, "num_word_context": 202.1658, "num_word_doc": 49.8535, "num_word_query": 23.3047, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7604.3007, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2275, "query_norm": 1.2999, "queue_k_norm": 1.3949, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.383, "sent_len_1": 66.8377, "sent_len_max_0": 127.6025, "sent_len_max_1": 190.8462, "stdk": 0.0476, "stdq": 0.0427, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 54000 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.8727, "doc_norm": 1.3946, "encoder_q-embeddings": 11461.8125, "encoder_q-layer.0": 8354.3018, "encoder_q-layer.1": 8947.7236, "encoder_q-layer.10": 5089.1401, "encoder_q-layer.11": 11769.6572, "encoder_q-layer.2": 9682.0664, "encoder_q-layer.3": 9769.0859, "encoder_q-layer.4": 10134.8564, "encoder_q-layer.5": 9301.9082, "encoder_q-layer.6": 11415.6455, "encoder_q-layer.7": 9534.1465, "encoder_q-layer.8": 8818.3623, "encoder_q-layer.9": 5437.7305, "epoch": 0.35, "inbatch_neg_score": 0.2253, "inbatch_pos_score": 0.7803, "learning_rate": 2.5500000000000003e-05, "loss": 3.8727, "norm_diff": 0.0929, "norm_loss": 0.0, "num_token_doc": 66.8402, "num_token_overlap": 11.6803, "num_token_query": 31.3395, "num_token_union": 65.1424, "num_word_context": 202.2037, "num_word_doc": 49.8494, "num_word_query": 23.2736, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14453.737, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.225, "query_norm": 1.3017, "queue_k_norm": 1.3949, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3395, "sent_len_1": 66.8402, "sent_len_max_0": 127.425, "sent_len_max_1": 190.3875, "stdk": 0.0474, "stdq": 0.0429, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 54100 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 3.8716, "doc_norm": 1.3924, "encoder_q-embeddings": 6534.3843, "encoder_q-layer.0": 4487.8813, "encoder_q-layer.1": 4877.3086, "encoder_q-layer.10": 5198.4453, "encoder_q-layer.11": 12219.2412, "encoder_q-layer.2": 5687.1768, "encoder_q-layer.3": 5791.7163, "encoder_q-layer.4": 5654.4629, "encoder_q-layer.5": 5433.1411, "encoder_q-layer.6": 5222.8818, "encoder_q-layer.7": 5287.623, "encoder_q-layer.8": 5577.8887, "encoder_q-layer.9": 4847.9761, "epoch": 0.35, "inbatch_neg_score": 0.2243, "inbatch_pos_score": 0.7505, "learning_rate": 2.5444444444444442e-05, "loss": 3.8716, "norm_diff": 0.1223, "norm_loss": 0.0, "num_token_doc": 66.8691, "num_token_overlap": 11.6369, "num_token_query": 31.259, "num_token_union": 65.1805, "num_word_context": 202.3297, "num_word_doc": 49.8971, "num_word_query": 23.2188, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9435.4014, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2233, "query_norm": 1.27, "queue_k_norm": 1.396, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.259, "sent_len_1": 66.8691, "sent_len_max_0": 127.4513, "sent_len_max_1": 190.5062, "stdk": 0.0473, "stdq": 0.0417, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 54200 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.8538, "doc_norm": 1.3936, "encoder_q-embeddings": 5137.6904, "encoder_q-layer.0": 3492.9976, "encoder_q-layer.1": 3675.1267, "encoder_q-layer.10": 4986.0674, "encoder_q-layer.11": 12370.9775, "encoder_q-layer.2": 4132.4458, "encoder_q-layer.3": 4282.5986, "encoder_q-layer.4": 4342.686, "encoder_q-layer.5": 4027.9595, "encoder_q-layer.6": 4379.2188, "encoder_q-layer.7": 4374.0791, "encoder_q-layer.8": 5356.8257, "encoder_q-layer.9": 4717.6445, "epoch": 0.35, "inbatch_neg_score": 0.2219, "inbatch_pos_score": 0.7817, "learning_rate": 2.538888888888889e-05, "loss": 3.8538, "norm_diff": 0.0926, "norm_loss": 0.0, "num_token_doc": 66.8061, "num_token_overlap": 11.6892, "num_token_query": 31.3931, "num_token_union": 65.1642, "num_word_context": 202.1029, "num_word_doc": 49.861, "num_word_query": 23.3171, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8435.162, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2227, "query_norm": 1.301, "queue_k_norm": 1.3964, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3931, "sent_len_1": 66.8061, "sent_len_max_0": 127.6025, "sent_len_max_1": 188.15, "stdk": 0.0474, "stdq": 0.0429, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 54300 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.8615, "doc_norm": 1.395, "encoder_q-embeddings": 3429.8752, "encoder_q-layer.0": 2345.5874, "encoder_q-layer.1": 2572.8914, "encoder_q-layer.10": 2760.3276, "encoder_q-layer.11": 6366.207, "encoder_q-layer.2": 2868.8965, "encoder_q-layer.3": 3274.0549, "encoder_q-layer.4": 3519.9844, "encoder_q-layer.5": 3462.3616, "encoder_q-layer.6": 3279.4365, "encoder_q-layer.7": 3231.4541, "encoder_q-layer.8": 3217.9832, "encoder_q-layer.9": 2501.2771, "epoch": 0.35, "inbatch_neg_score": 0.2277, "inbatch_pos_score": 0.7832, "learning_rate": 2.5333333333333337e-05, "loss": 3.8615, "norm_diff": 0.1057, "norm_loss": 0.0, "num_token_doc": 66.6911, "num_token_overlap": 11.636, "num_token_query": 31.2719, "num_token_union": 65.0061, "num_word_context": 202.3448, "num_word_doc": 49.8058, "num_word_query": 23.2258, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5214.9301, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2279, "query_norm": 1.2893, "queue_k_norm": 1.3927, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2719, "sent_len_1": 66.6911, "sent_len_max_0": 127.56, "sent_len_max_1": 188.7663, "stdk": 0.0474, "stdq": 0.0422, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 54400 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.8586, "doc_norm": 1.398, "encoder_q-embeddings": 2516.9575, "encoder_q-layer.0": 1797.0929, "encoder_q-layer.1": 1963.0305, "encoder_q-layer.10": 2517.001, "encoder_q-layer.11": 6136.6045, "encoder_q-layer.2": 2189.5203, "encoder_q-layer.3": 2215.4102, "encoder_q-layer.4": 2426.7166, "encoder_q-layer.5": 2379.197, "encoder_q-layer.6": 2383.4089, "encoder_q-layer.7": 2643.7295, "encoder_q-layer.8": 2948.9785, "encoder_q-layer.9": 2499.2478, "epoch": 0.35, "inbatch_neg_score": 0.2223, "inbatch_pos_score": 0.7588, "learning_rate": 2.527777777777778e-05, "loss": 3.8586, "norm_diff": 0.1192, "norm_loss": 0.0, "num_token_doc": 66.849, "num_token_overlap": 11.6615, "num_token_query": 31.3453, "num_token_union": 65.1709, "num_word_context": 202.3427, "num_word_doc": 49.9, "num_word_query": 23.2908, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4321.783, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2225, "query_norm": 1.2788, "queue_k_norm": 1.3941, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3453, "sent_len_1": 66.849, "sent_len_max_0": 127.5588, "sent_len_max_1": 189.3113, "stdk": 0.0475, "stdq": 0.042, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 54500 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.8326, "doc_norm": 1.3956, "encoder_q-embeddings": 7197.5039, "encoder_q-layer.0": 5036.0298, "encoder_q-layer.1": 5371.7944, "encoder_q-layer.10": 2392.0303, "encoder_q-layer.11": 6049.979, "encoder_q-layer.2": 6851.9404, "encoder_q-layer.3": 6912.5366, "encoder_q-layer.4": 7102.6768, "encoder_q-layer.5": 6450.3579, "encoder_q-layer.6": 6109.1367, "encoder_q-layer.7": 5077.8442, "encoder_q-layer.8": 4269.6621, "encoder_q-layer.9": 2874.7917, "epoch": 0.36, "inbatch_neg_score": 0.222, "inbatch_pos_score": 0.7822, "learning_rate": 2.5222222222222225e-05, "loss": 3.8326, "norm_diff": 0.1023, "norm_loss": 0.0, "num_token_doc": 66.6342, "num_token_overlap": 11.6696, "num_token_query": 31.3448, "num_token_union": 65.0347, "num_word_context": 202.1716, "num_word_doc": 49.7159, "num_word_query": 23.2925, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8687.4585, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2218, "query_norm": 1.2934, "queue_k_norm": 1.3957, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3448, "sent_len_1": 66.6342, "sent_len_max_0": 127.4437, "sent_len_max_1": 191.6125, "stdk": 0.0474, "stdq": 0.0425, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 54600 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 3.8552, "doc_norm": 1.3906, "encoder_q-embeddings": 2191.7581, "encoder_q-layer.0": 1440.9496, "encoder_q-layer.1": 1485.9932, "encoder_q-layer.10": 2444.2119, "encoder_q-layer.11": 6010.1201, "encoder_q-layer.2": 1609.7623, "encoder_q-layer.3": 1652.1508, "encoder_q-layer.4": 1745.4438, "encoder_q-layer.5": 1744.2594, "encoder_q-layer.6": 1949.0524, "encoder_q-layer.7": 2250.4387, "encoder_q-layer.8": 2729.6023, "encoder_q-layer.9": 2476.416, "epoch": 0.36, "inbatch_neg_score": 0.2264, "inbatch_pos_score": 0.7803, "learning_rate": 2.5166666666666667e-05, "loss": 3.8552, "norm_diff": 0.0896, "norm_loss": 0.0, "num_token_doc": 66.8313, "num_token_overlap": 11.6775, "num_token_query": 31.3718, "num_token_union": 65.1426, "num_word_context": 202.5272, "num_word_doc": 49.8935, "num_word_query": 23.3039, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3968.27, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2263, "query_norm": 1.301, "queue_k_norm": 1.3961, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3718, "sent_len_1": 66.8313, "sent_len_max_0": 127.49, "sent_len_max_1": 190.365, "stdk": 0.0473, "stdq": 0.0428, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 54700 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.8572, "doc_norm": 1.399, "encoder_q-embeddings": 3033.4844, "encoder_q-layer.0": 2092.8916, "encoder_q-layer.1": 2168.0466, "encoder_q-layer.10": 2543.5818, "encoder_q-layer.11": 6292.6226, "encoder_q-layer.2": 2502.6677, "encoder_q-layer.3": 2578.1404, "encoder_q-layer.4": 2705.4294, "encoder_q-layer.5": 2858.717, "encoder_q-layer.6": 2741.8706, "encoder_q-layer.7": 2869.5229, "encoder_q-layer.8": 2803.5557, "encoder_q-layer.9": 2473.9956, "epoch": 0.36, "inbatch_neg_score": 0.2289, "inbatch_pos_score": 0.7866, "learning_rate": 2.5111111111111113e-05, "loss": 3.8572, "norm_diff": 0.097, "norm_loss": 0.0, "num_token_doc": 66.8489, "num_token_overlap": 11.661, "num_token_query": 31.4086, "num_token_union": 65.192, "num_word_context": 202.2131, "num_word_doc": 49.902, "num_word_query": 23.3388, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4641.9756, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2296, "query_norm": 1.3021, "queue_k_norm": 1.3935, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4086, "sent_len_1": 66.8489, "sent_len_max_0": 127.4762, "sent_len_max_1": 188.1312, "stdk": 0.0475, "stdq": 0.0426, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 54800 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.8484, "doc_norm": 1.3941, "encoder_q-embeddings": 2403.6865, "encoder_q-layer.0": 1763.752, "encoder_q-layer.1": 1838.4265, "encoder_q-layer.10": 2554.7014, "encoder_q-layer.11": 6160.0693, "encoder_q-layer.2": 1937.1597, "encoder_q-layer.3": 1929.2446, "encoder_q-layer.4": 2042.3225, "encoder_q-layer.5": 2137.8704, "encoder_q-layer.6": 2280.49, "encoder_q-layer.7": 2414.6709, "encoder_q-layer.8": 2580.0737, "encoder_q-layer.9": 2354.2639, "epoch": 0.36, "inbatch_neg_score": 0.2304, "inbatch_pos_score": 0.79, "learning_rate": 2.5055555555555555e-05, "loss": 3.8484, "norm_diff": 0.0989, "norm_loss": 0.0, "num_token_doc": 66.7242, "num_token_overlap": 11.7087, "num_token_query": 31.4388, "num_token_union": 65.0991, "num_word_context": 202.0955, "num_word_doc": 49.8111, "num_word_query": 23.35, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4188.9475, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2302, "query_norm": 1.2952, "queue_k_norm": 1.3951, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4388, "sent_len_1": 66.7242, "sent_len_max_0": 127.46, "sent_len_max_1": 187.785, "stdk": 0.0474, "stdq": 0.0423, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 54900 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.8604, "doc_norm": 1.4045, "encoder_q-embeddings": 2138.7209, "encoder_q-layer.0": 1454.5247, "encoder_q-layer.1": 1559.2905, "encoder_q-layer.10": 2772.6194, "encoder_q-layer.11": 6062.6543, "encoder_q-layer.2": 1729.7852, "encoder_q-layer.3": 1775.8846, "encoder_q-layer.4": 1833.1599, "encoder_q-layer.5": 1832.6913, "encoder_q-layer.6": 2059.1887, "encoder_q-layer.7": 2313.9043, "encoder_q-layer.8": 2780.8137, "encoder_q-layer.9": 2532.2371, "epoch": 0.36, "inbatch_neg_score": 0.2285, "inbatch_pos_score": 0.7842, "learning_rate": 2.5e-05, "loss": 3.8604, "norm_diff": 0.1103, "norm_loss": 0.0, "num_token_doc": 66.9717, "num_token_overlap": 11.6802, "num_token_query": 31.4074, "num_token_union": 65.2474, "num_word_context": 202.8136, "num_word_doc": 49.9454, "num_word_query": 23.3385, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3987.3318, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2289, "query_norm": 1.2943, "queue_k_norm": 1.3928, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4074, "sent_len_1": 66.9717, "sent_len_max_0": 127.4712, "sent_len_max_1": 189.84, "stdk": 0.0478, "stdq": 0.0423, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 55000 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.8342, "doc_norm": 1.3958, "encoder_q-embeddings": 2464.5347, "encoder_q-layer.0": 1575.3367, "encoder_q-layer.1": 1698.8887, "encoder_q-layer.10": 2777.7168, "encoder_q-layer.11": 6433.248, "encoder_q-layer.2": 1883.3077, "encoder_q-layer.3": 1988.8538, "encoder_q-layer.4": 2083.9163, "encoder_q-layer.5": 2125.4087, "encoder_q-layer.6": 2403.7485, "encoder_q-layer.7": 2523.9585, "encoder_q-layer.8": 3138.1726, "encoder_q-layer.9": 2782.8027, "epoch": 0.36, "inbatch_neg_score": 0.2307, "inbatch_pos_score": 0.7852, "learning_rate": 2.4944444444444447e-05, "loss": 3.8342, "norm_diff": 0.1, "norm_loss": 0.0, "num_token_doc": 66.5671, "num_token_overlap": 11.6434, "num_token_query": 31.3016, "num_token_union": 64.9794, "num_word_context": 201.9634, "num_word_doc": 49.6643, "num_word_query": 23.2393, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4333.3906, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2307, "query_norm": 1.2958, "queue_k_norm": 1.3942, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3016, "sent_len_1": 66.5671, "sent_len_max_0": 127.5212, "sent_len_max_1": 188.455, "stdk": 0.0474, "stdq": 0.0424, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 55100 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.8368, "doc_norm": 1.3977, "encoder_q-embeddings": 3078.1521, "encoder_q-layer.0": 2102.8521, "encoder_q-layer.1": 2216.272, "encoder_q-layer.10": 2560.4851, "encoder_q-layer.11": 6336.0225, "encoder_q-layer.2": 2522.4524, "encoder_q-layer.3": 2436.0405, "encoder_q-layer.4": 2433.0278, "encoder_q-layer.5": 2421.5579, "encoder_q-layer.6": 2584.6978, "encoder_q-layer.7": 2766.7217, "encoder_q-layer.8": 2877.7388, "encoder_q-layer.9": 2550.0667, "epoch": 0.36, "inbatch_neg_score": 0.2338, "inbatch_pos_score": 0.7896, "learning_rate": 2.488888888888889e-05, "loss": 3.8368, "norm_diff": 0.1001, "norm_loss": 0.0, "num_token_doc": 66.9325, "num_token_overlap": 11.7105, "num_token_query": 31.4148, "num_token_union": 65.1932, "num_word_context": 202.2583, "num_word_doc": 49.9518, "num_word_query": 23.3437, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4604.0958, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2332, "query_norm": 1.2975, "queue_k_norm": 1.3955, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4148, "sent_len_1": 66.9325, "sent_len_max_0": 127.3912, "sent_len_max_1": 190.1662, "stdk": 0.0475, "stdq": 0.0425, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 55200 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.8493, "doc_norm": 1.3956, "encoder_q-embeddings": 2086.2629, "encoder_q-layer.0": 1373.4579, "encoder_q-layer.1": 1383.4747, "encoder_q-layer.10": 2663.0474, "encoder_q-layer.11": 6083.1187, "encoder_q-layer.2": 1575.0504, "encoder_q-layer.3": 1568.7627, "encoder_q-layer.4": 1670.074, "encoder_q-layer.5": 1685.4039, "encoder_q-layer.6": 1905.4857, "encoder_q-layer.7": 2155.0552, "encoder_q-layer.8": 2711.5732, "encoder_q-layer.9": 2468.8589, "epoch": 0.36, "inbatch_neg_score": 0.2348, "inbatch_pos_score": 0.7939, "learning_rate": 2.4833333333333335e-05, "loss": 3.8493, "norm_diff": 0.0968, "norm_loss": 0.0, "num_token_doc": 66.8344, "num_token_overlap": 11.6443, "num_token_query": 31.3387, "num_token_union": 65.1618, "num_word_context": 202.61, "num_word_doc": 49.8885, "num_word_query": 23.2847, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3922.5294, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.234, "query_norm": 1.2988, "queue_k_norm": 1.3962, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3387, "sent_len_1": 66.8344, "sent_len_max_0": 127.47, "sent_len_max_1": 188.455, "stdk": 0.0474, "stdq": 0.0426, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 55300 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.831, "doc_norm": 1.4049, "encoder_q-embeddings": 2492.3372, "encoder_q-layer.0": 1772.7279, "encoder_q-layer.1": 1899.4476, "encoder_q-layer.10": 2669.4937, "encoder_q-layer.11": 6595.5762, "encoder_q-layer.2": 2218.3372, "encoder_q-layer.3": 2426.1873, "encoder_q-layer.4": 2772.7073, "encoder_q-layer.5": 2565.5325, "encoder_q-layer.6": 2572.6277, "encoder_q-layer.7": 2452.0261, "encoder_q-layer.8": 2669.5312, "encoder_q-layer.9": 2510.9285, "epoch": 0.36, "inbatch_neg_score": 0.2342, "inbatch_pos_score": 0.7925, "learning_rate": 2.477777777777778e-05, "loss": 3.831, "norm_diff": 0.1182, "norm_loss": 0.0, "num_token_doc": 67.0235, "num_token_overlap": 11.7328, "num_token_query": 31.4531, "num_token_union": 65.335, "num_word_context": 202.8481, "num_word_doc": 50.0378, "num_word_query": 23.3556, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4425.137, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2335, "query_norm": 1.2867, "queue_k_norm": 1.3974, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4531, "sent_len_1": 67.0235, "sent_len_max_0": 127.4038, "sent_len_max_1": 188.6138, "stdk": 0.0477, "stdq": 0.0422, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 55400 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.8491, "doc_norm": 1.3968, "encoder_q-embeddings": 2175.0637, "encoder_q-layer.0": 1467.6538, "encoder_q-layer.1": 1514.4929, "encoder_q-layer.10": 2489.9534, "encoder_q-layer.11": 6487.4956, "encoder_q-layer.2": 1688.4995, "encoder_q-layer.3": 1742.8898, "encoder_q-layer.4": 1772.6652, "encoder_q-layer.5": 1766.7697, "encoder_q-layer.6": 2008.9454, "encoder_q-layer.7": 2214.5271, "encoder_q-layer.8": 2553.0344, "encoder_q-layer.9": 2365.4624, "epoch": 0.36, "inbatch_neg_score": 0.2355, "inbatch_pos_score": 0.7749, "learning_rate": 2.4722222222222223e-05, "loss": 3.8491, "norm_diff": 0.1125, "norm_loss": 0.0, "num_token_doc": 66.8312, "num_token_overlap": 11.7007, "num_token_query": 31.3818, "num_token_union": 65.1412, "num_word_context": 202.7323, "num_word_doc": 49.8589, "num_word_query": 23.3212, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4054.8024, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2341, "query_norm": 1.2844, "queue_k_norm": 1.3959, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3818, "sent_len_1": 66.8312, "sent_len_max_0": 127.6375, "sent_len_max_1": 191.7163, "stdk": 0.0474, "stdq": 0.0422, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 55500 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.8404, "doc_norm": 1.3895, "encoder_q-embeddings": 2338.3284, "encoder_q-layer.0": 1561.7053, "encoder_q-layer.1": 1699.0282, "encoder_q-layer.10": 2380.2756, "encoder_q-layer.11": 5889.356, "encoder_q-layer.2": 1894.0835, "encoder_q-layer.3": 1820.6407, "encoder_q-layer.4": 1826.899, "encoder_q-layer.5": 1791.2522, "encoder_q-layer.6": 1984.0425, "encoder_q-layer.7": 2144.3953, "encoder_q-layer.8": 2407.2446, "encoder_q-layer.9": 2264.4631, "epoch": 0.36, "inbatch_neg_score": 0.2339, "inbatch_pos_score": 0.7793, "learning_rate": 2.466666666666667e-05, "loss": 3.8404, "norm_diff": 0.1116, "norm_loss": 0.0, "num_token_doc": 66.5719, "num_token_overlap": 11.6695, "num_token_query": 31.3562, "num_token_union": 65.0391, "num_word_context": 202.149, "num_word_doc": 49.7006, "num_word_query": 23.2882, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3920.5466, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2328, "query_norm": 1.2779, "queue_k_norm": 1.3981, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3562, "sent_len_1": 66.5719, "sent_len_max_0": 127.4275, "sent_len_max_1": 188.0625, "stdk": 0.0472, "stdq": 0.0419, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 55600 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.8493, "doc_norm": 1.3942, "encoder_q-embeddings": 2073.7949, "encoder_q-layer.0": 1318.1156, "encoder_q-layer.1": 1378.842, "encoder_q-layer.10": 2582.2222, "encoder_q-layer.11": 6257.8354, "encoder_q-layer.2": 1538.9629, "encoder_q-layer.3": 1615.9242, "encoder_q-layer.4": 1659.5884, "encoder_q-layer.5": 1656.5764, "encoder_q-layer.6": 1864.2097, "encoder_q-layer.7": 2136.2441, "encoder_q-layer.8": 2483.312, "encoder_q-layer.9": 2303.4355, "epoch": 0.36, "inbatch_neg_score": 0.2325, "inbatch_pos_score": 0.7798, "learning_rate": 2.461111111111111e-05, "loss": 3.8493, "norm_diff": 0.1134, "norm_loss": 0.0, "num_token_doc": 66.8778, "num_token_overlap": 11.6911, "num_token_query": 31.3104, "num_token_union": 65.1232, "num_word_context": 202.3771, "num_word_doc": 49.8836, "num_word_query": 23.2602, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3915.3181, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2332, "query_norm": 1.2807, "queue_k_norm": 1.3977, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3104, "sent_len_1": 66.8778, "sent_len_max_0": 127.565, "sent_len_max_1": 190.6238, "stdk": 0.0473, "stdq": 0.042, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 55700 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.8427, "doc_norm": 1.3966, "encoder_q-embeddings": 2103.7205, "encoder_q-layer.0": 1407.1068, "encoder_q-layer.1": 1462.2076, "encoder_q-layer.10": 2536.3691, "encoder_q-layer.11": 6285.7202, "encoder_q-layer.2": 1703.6027, "encoder_q-layer.3": 1710.7429, "encoder_q-layer.4": 1795.0078, "encoder_q-layer.5": 1871.0857, "encoder_q-layer.6": 2024.6683, "encoder_q-layer.7": 2349.0476, "encoder_q-layer.8": 2688.4797, "encoder_q-layer.9": 2513.4504, "epoch": 0.36, "inbatch_neg_score": 0.2383, "inbatch_pos_score": 0.7891, "learning_rate": 2.4555555555555557e-05, "loss": 3.8427, "norm_diff": 0.096, "norm_loss": 0.0, "num_token_doc": 66.8095, "num_token_overlap": 11.6271, "num_token_query": 31.2817, "num_token_union": 65.1137, "num_word_context": 202.4596, "num_word_doc": 49.8258, "num_word_query": 23.224, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4005.9728, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2382, "query_norm": 1.3006, "queue_k_norm": 1.398, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2817, "sent_len_1": 66.8095, "sent_len_max_0": 127.3662, "sent_len_max_1": 190.4025, "stdk": 0.0474, "stdq": 0.0427, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 55800 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.8191, "doc_norm": 1.4008, "encoder_q-embeddings": 2297.5134, "encoder_q-layer.0": 1446.6013, "encoder_q-layer.1": 1517.4496, "encoder_q-layer.10": 2304.426, "encoder_q-layer.11": 5821.502, "encoder_q-layer.2": 1744.171, "encoder_q-layer.3": 1789.483, "encoder_q-layer.4": 1916.7289, "encoder_q-layer.5": 1996.7565, "encoder_q-layer.6": 2154.9934, "encoder_q-layer.7": 2278.6804, "encoder_q-layer.8": 2566.0171, "encoder_q-layer.9": 2222.4009, "epoch": 0.36, "inbatch_neg_score": 0.236, "inbatch_pos_score": 0.8125, "learning_rate": 2.45e-05, "loss": 3.8191, "norm_diff": 0.1008, "norm_loss": 0.0, "num_token_doc": 66.8238, "num_token_overlap": 11.6912, "num_token_query": 31.4613, "num_token_union": 65.2037, "num_word_context": 202.6729, "num_word_doc": 49.8396, "num_word_query": 23.3583, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3971.5599, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2362, "query_norm": 1.3001, "queue_k_norm": 1.3989, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4613, "sent_len_1": 66.8238, "sent_len_max_0": 127.5125, "sent_len_max_1": 189.4888, "stdk": 0.0476, "stdq": 0.0427, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 55900 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 3.8293, "doc_norm": 1.399, "encoder_q-embeddings": 2139.748, "encoder_q-layer.0": 1381.0872, "encoder_q-layer.1": 1436.2727, "encoder_q-layer.10": 2378.6833, "encoder_q-layer.11": 6005.208, "encoder_q-layer.2": 1586.9698, "encoder_q-layer.3": 1659.1189, "encoder_q-layer.4": 1748.4905, "encoder_q-layer.5": 1798.2607, "encoder_q-layer.6": 1951.1044, "encoder_q-layer.7": 2296.634, "encoder_q-layer.8": 2651.4395, "encoder_q-layer.9": 2393.0371, "epoch": 0.36, "inbatch_neg_score": 0.2352, "inbatch_pos_score": 0.7954, "learning_rate": 2.4444444444444445e-05, "loss": 3.8293, "norm_diff": 0.1031, "norm_loss": 0.0, "num_token_doc": 66.6403, "num_token_overlap": 11.6671, "num_token_query": 31.3675, "num_token_union": 65.0363, "num_word_context": 202.3402, "num_word_doc": 49.7357, "num_word_query": 23.2985, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3939.1957, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2361, "query_norm": 1.2959, "queue_k_norm": 1.3966, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3675, "sent_len_1": 66.6403, "sent_len_max_0": 127.5375, "sent_len_max_1": 187.8887, "stdk": 0.0475, "stdq": 0.0425, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 56000 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.831, "doc_norm": 1.4049, "encoder_q-embeddings": 2394.1116, "encoder_q-layer.0": 1609.4219, "encoder_q-layer.1": 1726.6476, "encoder_q-layer.10": 2703.313, "encoder_q-layer.11": 6232.0889, "encoder_q-layer.2": 1880.124, "encoder_q-layer.3": 1870.5222, "encoder_q-layer.4": 1904.8728, "encoder_q-layer.5": 1878.1086, "encoder_q-layer.6": 2146.6208, "encoder_q-layer.7": 2394.4482, "encoder_q-layer.8": 2669.2292, "encoder_q-layer.9": 2533.5808, "epoch": 0.37, "inbatch_neg_score": 0.2342, "inbatch_pos_score": 0.8135, "learning_rate": 2.4388888888888887e-05, "loss": 3.831, "norm_diff": 0.1077, "norm_loss": 0.0, "num_token_doc": 66.9219, "num_token_overlap": 11.7019, "num_token_query": 31.4674, "num_token_union": 65.2215, "num_word_context": 202.318, "num_word_doc": 49.9042, "num_word_query": 23.3397, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4118.585, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2352, "query_norm": 1.2971, "queue_k_norm": 1.3997, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4674, "sent_len_1": 66.9219, "sent_len_max_0": 127.28, "sent_len_max_1": 190.9238, "stdk": 0.0477, "stdq": 0.0426, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 56100 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.8174, "doc_norm": 1.3935, "encoder_q-embeddings": 4452.0264, "encoder_q-layer.0": 3207.9182, "encoder_q-layer.1": 3633.5205, "encoder_q-layer.10": 2655.533, "encoder_q-layer.11": 6223.4014, "encoder_q-layer.2": 4116.0005, "encoder_q-layer.3": 4614.8652, "encoder_q-layer.4": 5066.2373, "encoder_q-layer.5": 4342.311, "encoder_q-layer.6": 4599.6582, "encoder_q-layer.7": 4314.355, "encoder_q-layer.8": 2827.4275, "encoder_q-layer.9": 2525.9988, "epoch": 0.37, "inbatch_neg_score": 0.2421, "inbatch_pos_score": 0.7886, "learning_rate": 2.4333333333333336e-05, "loss": 3.8174, "norm_diff": 0.0944, "norm_loss": 0.0, "num_token_doc": 66.8391, "num_token_overlap": 11.7513, "num_token_query": 31.5891, "num_token_union": 65.238, "num_word_context": 202.6569, "num_word_doc": 49.8977, "num_word_query": 23.4844, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6257.887, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2422, "query_norm": 1.299, "queue_k_norm": 1.3992, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5891, "sent_len_1": 66.8391, "sent_len_max_0": 127.5787, "sent_len_max_1": 188.5163, "stdk": 0.0473, "stdq": 0.0424, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 56200 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.8278, "doc_norm": 1.4013, "encoder_q-embeddings": 3239.104, "encoder_q-layer.0": 2217.9136, "encoder_q-layer.1": 2516.5688, "encoder_q-layer.10": 2515.2295, "encoder_q-layer.11": 6163.9385, "encoder_q-layer.2": 2950.6003, "encoder_q-layer.3": 3140.8662, "encoder_q-layer.4": 3379.0354, "encoder_q-layer.5": 3520.5425, "encoder_q-layer.6": 3467.9243, "encoder_q-layer.7": 3118.4556, "encoder_q-layer.8": 3172.7859, "encoder_q-layer.9": 2471.8049, "epoch": 0.37, "inbatch_neg_score": 0.2413, "inbatch_pos_score": 0.8071, "learning_rate": 2.427777777777778e-05, "loss": 3.8278, "norm_diff": 0.0964, "norm_loss": 0.0, "num_token_doc": 67.0667, "num_token_overlap": 11.7184, "num_token_query": 31.4091, "num_token_union": 65.3087, "num_word_context": 202.3291, "num_word_doc": 50.0191, "num_word_query": 23.3267, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5065.3868, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2418, "query_norm": 1.3049, "queue_k_norm": 1.4001, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4091, "sent_len_1": 67.0667, "sent_len_max_0": 127.37, "sent_len_max_1": 189.9663, "stdk": 0.0475, "stdq": 0.0425, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 56300 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.8415, "doc_norm": 1.3968, "encoder_q-embeddings": 5368.854, "encoder_q-layer.0": 3657.4736, "encoder_q-layer.1": 3985.7207, "encoder_q-layer.10": 4764.5688, "encoder_q-layer.11": 11880.0791, "encoder_q-layer.2": 4237.5093, "encoder_q-layer.3": 4409.2705, "encoder_q-layer.4": 4774.9819, "encoder_q-layer.5": 4534.4517, "encoder_q-layer.6": 5152.4077, "encoder_q-layer.7": 4994.373, "encoder_q-layer.8": 5450.563, "encoder_q-layer.9": 4769.5859, "epoch": 0.37, "inbatch_neg_score": 0.241, "inbatch_pos_score": 0.8086, "learning_rate": 2.4222222222222224e-05, "loss": 3.8415, "norm_diff": 0.0817, "norm_loss": 0.0, "num_token_doc": 66.9526, "num_token_overlap": 11.6386, "num_token_query": 31.296, "num_token_union": 65.2148, "num_word_context": 202.5427, "num_word_doc": 49.9171, "num_word_query": 23.2324, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8476.4976, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2415, "query_norm": 1.3151, "queue_k_norm": 1.4007, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.296, "sent_len_1": 66.9526, "sent_len_max_0": 127.5025, "sent_len_max_1": 190.71, "stdk": 0.0474, "stdq": 0.0429, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 56400 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.8325, "doc_norm": 1.4015, "encoder_q-embeddings": 4542.3667, "encoder_q-layer.0": 2953.2681, "encoder_q-layer.1": 3104.4231, "encoder_q-layer.10": 5176.1963, "encoder_q-layer.11": 12626.168, "encoder_q-layer.2": 3410.6331, "encoder_q-layer.3": 3417.8391, "encoder_q-layer.4": 3643.5186, "encoder_q-layer.5": 3794.7952, "encoder_q-layer.6": 4235.1235, "encoder_q-layer.7": 4609.5649, "encoder_q-layer.8": 5417.2861, "encoder_q-layer.9": 4754.3457, "epoch": 0.37, "inbatch_neg_score": 0.2399, "inbatch_pos_score": 0.7993, "learning_rate": 2.4166666666666667e-05, "loss": 3.8325, "norm_diff": 0.0919, "norm_loss": 0.0, "num_token_doc": 66.7649, "num_token_overlap": 11.6278, "num_token_query": 31.2775, "num_token_union": 65.0818, "num_word_context": 202.0487, "num_word_doc": 49.7693, "num_word_query": 23.2224, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8111.5269, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2396, "query_norm": 1.3097, "queue_k_norm": 1.3998, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2775, "sent_len_1": 66.7649, "sent_len_max_0": 127.4537, "sent_len_max_1": 190.0975, "stdk": 0.0475, "stdq": 0.0428, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 56500 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.8316, "doc_norm": 1.4093, "encoder_q-embeddings": 22537.4316, "encoder_q-layer.0": 15496.1182, "encoder_q-layer.1": 18049.1699, "encoder_q-layer.10": 5581.9473, "encoder_q-layer.11": 12421.8066, "encoder_q-layer.2": 17284.5664, "encoder_q-layer.3": 17168.2344, "encoder_q-layer.4": 14079.9795, "encoder_q-layer.5": 10831.3369, "encoder_q-layer.6": 8003.3237, "encoder_q-layer.7": 5978.9839, "encoder_q-layer.8": 6467.8467, "encoder_q-layer.9": 5850.0474, "epoch": 0.37, "inbatch_neg_score": 0.2426, "inbatch_pos_score": 0.8379, "learning_rate": 2.4111111111111113e-05, "loss": 3.8316, "norm_diff": 0.0952, "norm_loss": 0.0, "num_token_doc": 66.8313, "num_token_overlap": 11.6829, "num_token_query": 31.4462, "num_token_union": 65.2096, "num_word_context": 202.5366, "num_word_doc": 49.8971, "num_word_query": 23.3625, "postclip_grad_norm": 1.0, "preclip_grad_norm": 21124.71, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2434, "query_norm": 1.3141, "queue_k_norm": 1.4016, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4462, "sent_len_1": 66.8313, "sent_len_max_0": 127.5812, "sent_len_max_1": 188.46, "stdk": 0.0478, "stdq": 0.0427, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 56600 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.839, "doc_norm": 1.3984, "encoder_q-embeddings": 5008.4814, "encoder_q-layer.0": 3408.626, "encoder_q-layer.1": 3624.6421, "encoder_q-layer.10": 4785.9062, "encoder_q-layer.11": 11958.8359, "encoder_q-layer.2": 3875.8579, "encoder_q-layer.3": 4033.0447, "encoder_q-layer.4": 4129.9087, "encoder_q-layer.5": 3996.446, "encoder_q-layer.6": 4747.8223, "encoder_q-layer.7": 4747.3057, "encoder_q-layer.8": 4952.0518, "encoder_q-layer.9": 4832.3135, "epoch": 0.37, "inbatch_neg_score": 0.2443, "inbatch_pos_score": 0.8037, "learning_rate": 2.4055555555555555e-05, "loss": 3.839, "norm_diff": 0.081, "norm_loss": 0.0, "num_token_doc": 66.8366, "num_token_overlap": 11.6579, "num_token_query": 31.3533, "num_token_union": 65.1877, "num_word_context": 202.1715, "num_word_doc": 49.8882, "num_word_query": 23.2973, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8181.3656, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2444, "query_norm": 1.3174, "queue_k_norm": 1.4017, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3533, "sent_len_1": 66.8366, "sent_len_max_0": 127.4638, "sent_len_max_1": 189.0412, "stdk": 0.0474, "stdq": 0.0428, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 56700 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.8302, "doc_norm": 1.4021, "encoder_q-embeddings": 4186.1206, "encoder_q-layer.0": 2819.2454, "encoder_q-layer.1": 2923.6348, "encoder_q-layer.10": 4778.623, "encoder_q-layer.11": 11989.1465, "encoder_q-layer.2": 3248.532, "encoder_q-layer.3": 3314.4998, "encoder_q-layer.4": 3386.9573, "encoder_q-layer.5": 3481.0876, "encoder_q-layer.6": 3962.782, "encoder_q-layer.7": 4245.0103, "encoder_q-layer.8": 4875.0078, "encoder_q-layer.9": 4629.4619, "epoch": 0.37, "inbatch_neg_score": 0.2462, "inbatch_pos_score": 0.8032, "learning_rate": 2.4e-05, "loss": 3.8302, "norm_diff": 0.0921, "norm_loss": 0.0, "num_token_doc": 66.8254, "num_token_overlap": 11.7028, "num_token_query": 31.4588, "num_token_union": 65.15, "num_word_context": 202.5516, "num_word_doc": 49.8745, "num_word_query": 23.3671, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7741.2706, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2462, "query_norm": 1.31, "queue_k_norm": 1.4037, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4588, "sent_len_1": 66.8254, "sent_len_max_0": 127.6112, "sent_len_max_1": 190.6887, "stdk": 0.0476, "stdq": 0.0425, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 56800 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.833, "doc_norm": 1.4024, "encoder_q-embeddings": 5131.1909, "encoder_q-layer.0": 3451.8518, "encoder_q-layer.1": 3687.4187, "encoder_q-layer.10": 4664.7134, "encoder_q-layer.11": 12045.0107, "encoder_q-layer.2": 4225.23, "encoder_q-layer.3": 4491.4253, "encoder_q-layer.4": 4838.5493, "encoder_q-layer.5": 4984.5234, "encoder_q-layer.6": 5515.5566, "encoder_q-layer.7": 4985.3477, "encoder_q-layer.8": 4939.9805, "encoder_q-layer.9": 4314.9434, "epoch": 0.37, "inbatch_neg_score": 0.2444, "inbatch_pos_score": 0.7949, "learning_rate": 2.3944444444444443e-05, "loss": 3.833, "norm_diff": 0.0982, "norm_loss": 0.0, "num_token_doc": 66.7776, "num_token_overlap": 11.6406, "num_token_query": 31.3078, "num_token_union": 65.1187, "num_word_context": 202.3134, "num_word_doc": 49.8327, "num_word_query": 23.2483, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8529.7603, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2455, "query_norm": 1.3042, "queue_k_norm": 1.4046, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3078, "sent_len_1": 66.7776, "sent_len_max_0": 127.475, "sent_len_max_1": 187.565, "stdk": 0.0475, "stdq": 0.0423, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 56900 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.8614, "doc_norm": 1.4006, "encoder_q-embeddings": 2235.7388, "encoder_q-layer.0": 1556.286, "encoder_q-layer.1": 1581.6914, "encoder_q-layer.10": 2475.7722, "encoder_q-layer.11": 5778.2876, "encoder_q-layer.2": 1752.6432, "encoder_q-layer.3": 1865.1809, "encoder_q-layer.4": 1938.5896, "encoder_q-layer.5": 2073.1802, "encoder_q-layer.6": 2252.7815, "encoder_q-layer.7": 2341.145, "encoder_q-layer.8": 2524.1169, "encoder_q-layer.9": 2217.1875, "epoch": 0.37, "inbatch_neg_score": 0.2473, "inbatch_pos_score": 0.8066, "learning_rate": 2.3888888888888892e-05, "loss": 3.8614, "norm_diff": 0.0945, "norm_loss": 0.0, "num_token_doc": 66.7275, "num_token_overlap": 11.6349, "num_token_query": 31.2464, "num_token_union": 64.9663, "num_word_context": 202.2861, "num_word_doc": 49.7715, "num_word_query": 23.2063, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3944.4575, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2467, "query_norm": 1.3061, "queue_k_norm": 1.4029, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2464, "sent_len_1": 66.7275, "sent_len_max_0": 127.5888, "sent_len_max_1": 189.1712, "stdk": 0.0474, "stdq": 0.0421, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 57000 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.8261, "doc_norm": 1.3966, "encoder_q-embeddings": 2341.0859, "encoder_q-layer.0": 1590.2911, "encoder_q-layer.1": 1716.2794, "encoder_q-layer.10": 2529.415, "encoder_q-layer.11": 6087.0303, "encoder_q-layer.2": 1963.6074, "encoder_q-layer.3": 2025.9387, "encoder_q-layer.4": 2051.2676, "encoder_q-layer.5": 2108.5952, "encoder_q-layer.6": 2258.8313, "encoder_q-layer.7": 2400.7834, "encoder_q-layer.8": 2528.8545, "encoder_q-layer.9": 2316.2458, "epoch": 0.37, "inbatch_neg_score": 0.2533, "inbatch_pos_score": 0.7969, "learning_rate": 2.3833333333333334e-05, "loss": 3.8261, "norm_diff": 0.0851, "norm_loss": 0.0, "num_token_doc": 66.7204, "num_token_overlap": 11.6412, "num_token_query": 31.2668, "num_token_union": 65.0723, "num_word_context": 202.4817, "num_word_doc": 49.8273, "num_word_query": 23.217, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4067.4494, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2515, "query_norm": 1.3114, "queue_k_norm": 1.4048, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2668, "sent_len_1": 66.7204, "sent_len_max_0": 127.41, "sent_len_max_1": 189.565, "stdk": 0.0473, "stdq": 0.0421, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 57100 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.8135, "doc_norm": 1.406, "encoder_q-embeddings": 1731.4183, "encoder_q-layer.0": 1167.3395, "encoder_q-layer.1": 1311.28, "encoder_q-layer.10": 1321.8608, "encoder_q-layer.11": 2919.5996, "encoder_q-layer.2": 1608.4705, "encoder_q-layer.3": 1713.452, "encoder_q-layer.4": 1790.8224, "encoder_q-layer.5": 1702.746, "encoder_q-layer.6": 1556.0216, "encoder_q-layer.7": 1500.0948, "encoder_q-layer.8": 1410.9617, "encoder_q-layer.9": 1174.7494, "epoch": 0.37, "inbatch_neg_score": 0.247, "inbatch_pos_score": 0.8193, "learning_rate": 2.377777777777778e-05, "loss": 3.8135, "norm_diff": 0.0764, "norm_loss": 0.0, "num_token_doc": 67.1311, "num_token_overlap": 11.7341, "num_token_query": 31.5427, "num_token_union": 65.3792, "num_word_context": 202.4737, "num_word_doc": 50.0857, "num_word_query": 23.4511, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2504.8968, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2473, "query_norm": 1.3296, "queue_k_norm": 1.4052, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5427, "sent_len_1": 67.1311, "sent_len_max_0": 127.1513, "sent_len_max_1": 189.8837, "stdk": 0.0476, "stdq": 0.0431, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 57200 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.8165, "doc_norm": 1.4025, "encoder_q-embeddings": 1772.6212, "encoder_q-layer.0": 1181.9149, "encoder_q-layer.1": 1252.6626, "encoder_q-layer.10": 1263.8054, "encoder_q-layer.11": 2975.4011, "encoder_q-layer.2": 1491.777, "encoder_q-layer.3": 1648.0269, "encoder_q-layer.4": 1756.7069, "encoder_q-layer.5": 1753.7234, "encoder_q-layer.6": 1812.4204, "encoder_q-layer.7": 1709.8088, "encoder_q-layer.8": 1610.952, "encoder_q-layer.9": 1214.0782, "epoch": 0.37, "inbatch_neg_score": 0.2527, "inbatch_pos_score": 0.8276, "learning_rate": 2.3722222222222222e-05, "loss": 3.8165, "norm_diff": 0.0891, "norm_loss": 0.0, "num_token_doc": 67.0202, "num_token_overlap": 11.6766, "num_token_query": 31.3425, "num_token_union": 65.2232, "num_word_context": 203.0142, "num_word_doc": 50.0256, "num_word_query": 23.2873, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2585.9699, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.252, "query_norm": 1.3133, "queue_k_norm": 1.4047, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3425, "sent_len_1": 67.0202, "sent_len_max_0": 127.6737, "sent_len_max_1": 188.3225, "stdk": 0.0475, "stdq": 0.0425, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 57300 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.8448, "doc_norm": 1.4041, "encoder_q-embeddings": 1389.2494, "encoder_q-layer.0": 925.8569, "encoder_q-layer.1": 959.4103, "encoder_q-layer.10": 1274.1672, "encoder_q-layer.11": 3121.1836, "encoder_q-layer.2": 1085.3832, "encoder_q-layer.3": 1131.0061, "encoder_q-layer.4": 1203.8676, "encoder_q-layer.5": 1129.4735, "encoder_q-layer.6": 1183.1672, "encoder_q-layer.7": 1239.4633, "encoder_q-layer.8": 1383.0353, "encoder_q-layer.9": 1193.5519, "epoch": 0.37, "inbatch_neg_score": 0.2501, "inbatch_pos_score": 0.8037, "learning_rate": 2.3666666666666668e-05, "loss": 3.8448, "norm_diff": 0.0987, "norm_loss": 0.0, "num_token_doc": 66.7438, "num_token_overlap": 11.7107, "num_token_query": 31.4396, "num_token_union": 65.1193, "num_word_context": 202.3155, "num_word_doc": 49.7932, "num_word_query": 23.3436, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2203.813, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2505, "query_norm": 1.3054, "queue_k_norm": 1.4052, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4396, "sent_len_1": 66.7438, "sent_len_max_0": 127.4912, "sent_len_max_1": 190.345, "stdk": 0.0475, "stdq": 0.0424, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 57400 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.8365, "doc_norm": 1.4049, "encoder_q-embeddings": 1522.668, "encoder_q-layer.0": 1010.6259, "encoder_q-layer.1": 1116.9174, "encoder_q-layer.10": 1292.7693, "encoder_q-layer.11": 3112.2371, "encoder_q-layer.2": 1221.0443, "encoder_q-layer.3": 1296.396, "encoder_q-layer.4": 1358.2235, "encoder_q-layer.5": 1372.8239, "encoder_q-layer.6": 1386.7865, "encoder_q-layer.7": 1195.7725, "encoder_q-layer.8": 1328.1188, "encoder_q-layer.9": 1145.2927, "epoch": 0.37, "inbatch_neg_score": 0.253, "inbatch_pos_score": 0.8047, "learning_rate": 2.361111111111111e-05, "loss": 3.8365, "norm_diff": 0.1035, "norm_loss": 0.0, "num_token_doc": 66.815, "num_token_overlap": 11.6714, "num_token_query": 31.3745, "num_token_union": 65.1855, "num_word_context": 202.2929, "num_word_doc": 49.8846, "num_word_query": 23.2973, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2276.3026, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2534, "query_norm": 1.3014, "queue_k_norm": 1.4053, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3745, "sent_len_1": 66.815, "sent_len_max_0": 127.4338, "sent_len_max_1": 189.62, "stdk": 0.0476, "stdq": 0.0423, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 57500 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.8269, "doc_norm": 1.4049, "encoder_q-embeddings": 1700.8615, "encoder_q-layer.0": 1134.731, "encoder_q-layer.1": 1336.9731, "encoder_q-layer.10": 1260.3439, "encoder_q-layer.11": 2962.6069, "encoder_q-layer.2": 1613.8488, "encoder_q-layer.3": 1834.8381, "encoder_q-layer.4": 1891.9368, "encoder_q-layer.5": 2016.4708, "encoder_q-layer.6": 1713.3606, "encoder_q-layer.7": 1533.9329, "encoder_q-layer.8": 1311.9843, "encoder_q-layer.9": 1202.9867, "epoch": 0.37, "inbatch_neg_score": 0.2525, "inbatch_pos_score": 0.7954, "learning_rate": 2.3555555555555556e-05, "loss": 3.8269, "norm_diff": 0.1026, "norm_loss": 0.0, "num_token_doc": 66.8852, "num_token_overlap": 11.6776, "num_token_query": 31.3743, "num_token_union": 65.2096, "num_word_context": 202.5112, "num_word_doc": 49.9013, "num_word_query": 23.2933, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2565.9038, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2517, "query_norm": 1.3023, "queue_k_norm": 1.407, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3743, "sent_len_1": 66.8852, "sent_len_max_0": 127.5012, "sent_len_max_1": 187.0513, "stdk": 0.0475, "stdq": 0.0424, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 57600 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.8093, "doc_norm": 1.4116, "encoder_q-embeddings": 1327.8112, "encoder_q-layer.0": 893.7668, "encoder_q-layer.1": 918.4659, "encoder_q-layer.10": 1301.5181, "encoder_q-layer.11": 3052.9392, "encoder_q-layer.2": 1062.853, "encoder_q-layer.3": 1118.9022, "encoder_q-layer.4": 1169.0947, "encoder_q-layer.5": 1167.4398, "encoder_q-layer.6": 1228.8687, "encoder_q-layer.7": 1308.9291, "encoder_q-layer.8": 1350.92, "encoder_q-layer.9": 1182.2286, "epoch": 0.38, "inbatch_neg_score": 0.2471, "inbatch_pos_score": 0.8252, "learning_rate": 2.35e-05, "loss": 3.8093, "norm_diff": 0.1028, "norm_loss": 0.0, "num_token_doc": 66.6782, "num_token_overlap": 11.704, "num_token_query": 31.3866, "num_token_union": 65.0508, "num_word_context": 202.1414, "num_word_doc": 49.7545, "num_word_query": 23.3114, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2142.8997, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2493, "query_norm": 1.3088, "queue_k_norm": 1.4069, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3866, "sent_len_1": 66.6782, "sent_len_max_0": 127.4712, "sent_len_max_1": 187.965, "stdk": 0.0478, "stdq": 0.0429, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 57700 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.8143, "doc_norm": 1.4156, "encoder_q-embeddings": 1089.1746, "encoder_q-layer.0": 713.5544, "encoder_q-layer.1": 739.1293, "encoder_q-layer.10": 1399.2587, "encoder_q-layer.11": 3191.7954, "encoder_q-layer.2": 852.9203, "encoder_q-layer.3": 864.8281, "encoder_q-layer.4": 942.1882, "encoder_q-layer.5": 971.6486, "encoder_q-layer.6": 1033.1992, "encoder_q-layer.7": 1160.1252, "encoder_q-layer.8": 1338.478, "encoder_q-layer.9": 1300.3879, "epoch": 0.38, "inbatch_neg_score": 0.247, "inbatch_pos_score": 0.8032, "learning_rate": 2.3444444444444448e-05, "loss": 3.8143, "norm_diff": 0.1103, "norm_loss": 0.0, "num_token_doc": 66.8011, "num_token_overlap": 11.6411, "num_token_query": 31.2325, "num_token_union": 65.0747, "num_word_context": 202.4796, "num_word_doc": 49.8997, "num_word_query": 23.2066, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2054.2142, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2472, "query_norm": 1.3053, "queue_k_norm": 1.404, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2325, "sent_len_1": 66.8011, "sent_len_max_0": 127.3988, "sent_len_max_1": 188.9913, "stdk": 0.0479, "stdq": 0.0428, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 57800 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.8173, "doc_norm": 1.4099, "encoder_q-embeddings": 2259.4648, "encoder_q-layer.0": 1599.4889, "encoder_q-layer.1": 1767.214, "encoder_q-layer.10": 1251.3904, "encoder_q-layer.11": 2968.2737, "encoder_q-layer.2": 2031.9971, "encoder_q-layer.3": 2160.1648, "encoder_q-layer.4": 2401.8452, "encoder_q-layer.5": 2235.1057, "encoder_q-layer.6": 2006.7191, "encoder_q-layer.7": 1945.3706, "encoder_q-layer.8": 1512.9211, "encoder_q-layer.9": 1214.2632, "epoch": 0.38, "inbatch_neg_score": 0.2422, "inbatch_pos_score": 0.8047, "learning_rate": 2.338888888888889e-05, "loss": 3.8173, "norm_diff": 0.1315, "norm_loss": 0.0, "num_token_doc": 66.7324, "num_token_overlap": 11.6589, "num_token_query": 31.3713, "num_token_union": 65.0974, "num_word_context": 202.2522, "num_word_doc": 49.7606, "num_word_query": 23.2943, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2991.7262, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2412, "query_norm": 1.2784, "queue_k_norm": 1.4031, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3713, "sent_len_1": 66.7324, "sent_len_max_0": 127.46, "sent_len_max_1": 191.0263, "stdk": 0.0477, "stdq": 0.042, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 57900 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 3.8126, "doc_norm": 1.4052, "encoder_q-embeddings": 1507.2942, "encoder_q-layer.0": 1060.4413, "encoder_q-layer.1": 1127.7052, "encoder_q-layer.10": 1237.6647, "encoder_q-layer.11": 3050.2949, "encoder_q-layer.2": 1290.8651, "encoder_q-layer.3": 1289.6094, "encoder_q-layer.4": 1345.2959, "encoder_q-layer.5": 1356.0789, "encoder_q-layer.6": 1364.4991, "encoder_q-layer.7": 1398.0204, "encoder_q-layer.8": 1673.7098, "encoder_q-layer.9": 1170.3721, "epoch": 0.38, "inbatch_neg_score": 0.2405, "inbatch_pos_score": 0.7925, "learning_rate": 2.3333333333333336e-05, "loss": 3.8126, "norm_diff": 0.1185, "norm_loss": 0.0, "num_token_doc": 66.5569, "num_token_overlap": 11.708, "num_token_query": 31.5203, "num_token_union": 65.0345, "num_word_context": 202.0222, "num_word_doc": 49.6774, "num_word_query": 23.4004, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2350.2979, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2416, "query_norm": 1.2867, "queue_k_norm": 1.4061, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5203, "sent_len_1": 66.5569, "sent_len_max_0": 127.5987, "sent_len_max_1": 188.9988, "stdk": 0.0475, "stdq": 0.0422, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 58000 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 3.8332, "doc_norm": 1.4002, "encoder_q-embeddings": 1434.7327, "encoder_q-layer.0": 1002.2155, "encoder_q-layer.1": 1093.6857, "encoder_q-layer.10": 1200.4333, "encoder_q-layer.11": 2988.4006, "encoder_q-layer.2": 1235.1278, "encoder_q-layer.3": 1313.5222, "encoder_q-layer.4": 1344.436, "encoder_q-layer.5": 1399.77, "encoder_q-layer.6": 1329.8549, "encoder_q-layer.7": 1258.9109, "encoder_q-layer.8": 1366.2036, "encoder_q-layer.9": 1153.4412, "epoch": 0.38, "inbatch_neg_score": 0.244, "inbatch_pos_score": 0.7871, "learning_rate": 2.3277777777777778e-05, "loss": 3.8332, "norm_diff": 0.1173, "norm_loss": 0.0, "num_token_doc": 66.6566, "num_token_overlap": 11.6533, "num_token_query": 31.4245, "num_token_union": 65.1218, "num_word_context": 202.291, "num_word_doc": 49.7691, "num_word_query": 23.3475, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2243.753, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2433, "query_norm": 1.2829, "queue_k_norm": 1.406, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4245, "sent_len_1": 66.6566, "sent_len_max_0": 127.4225, "sent_len_max_1": 186.6125, "stdk": 0.0473, "stdq": 0.0419, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 58100 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.84, "doc_norm": 1.4092, "encoder_q-embeddings": 1589.3259, "encoder_q-layer.0": 1076.6663, "encoder_q-layer.1": 1182.6417, "encoder_q-layer.10": 1282.6403, "encoder_q-layer.11": 2989.2217, "encoder_q-layer.2": 1374.4143, "encoder_q-layer.3": 1477.2111, "encoder_q-layer.4": 1592.1453, "encoder_q-layer.5": 1618.0737, "encoder_q-layer.6": 1800.953, "encoder_q-layer.7": 1558.2789, "encoder_q-layer.8": 1482.9454, "encoder_q-layer.9": 1218.1544, "epoch": 0.38, "inbatch_neg_score": 0.24, "inbatch_pos_score": 0.7944, "learning_rate": 2.3222222222222224e-05, "loss": 3.84, "norm_diff": 0.1196, "norm_loss": 0.0, "num_token_doc": 66.8024, "num_token_overlap": 11.6381, "num_token_query": 31.2456, "num_token_union": 65.0613, "num_word_context": 201.8949, "num_word_doc": 49.7926, "num_word_query": 23.1987, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2457.2775, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2391, "query_norm": 1.2897, "queue_k_norm": 1.4048, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2456, "sent_len_1": 66.8024, "sent_len_max_0": 127.5375, "sent_len_max_1": 188.9325, "stdk": 0.0477, "stdq": 0.0423, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 58200 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.8248, "doc_norm": 1.4063, "encoder_q-embeddings": 1142.8506, "encoder_q-layer.0": 750.1462, "encoder_q-layer.1": 798.4301, "encoder_q-layer.10": 1265.4147, "encoder_q-layer.11": 2983.2354, "encoder_q-layer.2": 949.4929, "encoder_q-layer.3": 1008.9252, "encoder_q-layer.4": 1054.3916, "encoder_q-layer.5": 1045.2715, "encoder_q-layer.6": 1103.5006, "encoder_q-layer.7": 1151.1893, "encoder_q-layer.8": 1245.9598, "encoder_q-layer.9": 1180.1497, "epoch": 0.38, "inbatch_neg_score": 0.2402, "inbatch_pos_score": 0.8198, "learning_rate": 2.3166666666666666e-05, "loss": 3.8248, "norm_diff": 0.102, "norm_loss": 0.0, "num_token_doc": 66.9118, "num_token_overlap": 11.7058, "num_token_query": 31.4393, "num_token_union": 65.1815, "num_word_context": 202.2223, "num_word_doc": 49.9054, "num_word_query": 23.3699, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2012.6745, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2407, "query_norm": 1.3043, "queue_k_norm": 1.4073, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4393, "sent_len_1": 66.9118, "sent_len_max_0": 127.57, "sent_len_max_1": 189.5637, "stdk": 0.0476, "stdq": 0.0428, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 58300 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.8397, "doc_norm": 1.4115, "encoder_q-embeddings": 1186.4404, "encoder_q-layer.0": 832.6976, "encoder_q-layer.1": 879.5151, "encoder_q-layer.10": 1300.7159, "encoder_q-layer.11": 3046.8066, "encoder_q-layer.2": 1024.8025, "encoder_q-layer.3": 1054.7687, "encoder_q-layer.4": 1147.0646, "encoder_q-layer.5": 1141.5531, "encoder_q-layer.6": 1241.9238, "encoder_q-layer.7": 1234.2849, "encoder_q-layer.8": 1297.4943, "encoder_q-layer.9": 1154.5131, "epoch": 0.38, "inbatch_neg_score": 0.2422, "inbatch_pos_score": 0.7964, "learning_rate": 2.3111111111111112e-05, "loss": 3.8397, "norm_diff": 0.1078, "norm_loss": 0.0, "num_token_doc": 66.7307, "num_token_overlap": 11.6221, "num_token_query": 31.2506, "num_token_union": 65.0608, "num_word_context": 202.219, "num_word_doc": 49.7504, "num_word_query": 23.1908, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2102.1964, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2423, "query_norm": 1.3038, "queue_k_norm": 1.4046, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2506, "sent_len_1": 66.7307, "sent_len_max_0": 127.4788, "sent_len_max_1": 190.5813, "stdk": 0.0478, "stdq": 0.0427, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 58400 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.8251, "doc_norm": 1.4002, "encoder_q-embeddings": 1181.1747, "encoder_q-layer.0": 786.3376, "encoder_q-layer.1": 846.2241, "encoder_q-layer.10": 1253.6553, "encoder_q-layer.11": 3093.6133, "encoder_q-layer.2": 918.6892, "encoder_q-layer.3": 963.519, "encoder_q-layer.4": 1036.7003, "encoder_q-layer.5": 1143.1996, "encoder_q-layer.6": 1182.3398, "encoder_q-layer.7": 1286.4463, "encoder_q-layer.8": 1329.87, "encoder_q-layer.9": 1190.036, "epoch": 0.38, "inbatch_neg_score": 0.2381, "inbatch_pos_score": 0.7827, "learning_rate": 2.3055555555555558e-05, "loss": 3.8251, "norm_diff": 0.1106, "norm_loss": 0.0, "num_token_doc": 66.9373, "num_token_overlap": 11.6482, "num_token_query": 31.2868, "num_token_union": 65.1967, "num_word_context": 202.0802, "num_word_doc": 49.9351, "num_word_query": 23.2443, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2059.5136, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2395, "query_norm": 1.2896, "queue_k_norm": 1.4084, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2868, "sent_len_1": 66.9373, "sent_len_max_0": 127.3375, "sent_len_max_1": 189.8587, "stdk": 0.0474, "stdq": 0.0422, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 58500 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.8272, "doc_norm": 1.3975, "encoder_q-embeddings": 1059.353, "encoder_q-layer.0": 727.337, "encoder_q-layer.1": 745.6456, "encoder_q-layer.10": 1209.5748, "encoder_q-layer.11": 3033.3896, "encoder_q-layer.2": 823.5834, "encoder_q-layer.3": 814.45, "encoder_q-layer.4": 850.7015, "encoder_q-layer.5": 866.1165, "encoder_q-layer.6": 947.7499, "encoder_q-layer.7": 1041.5752, "encoder_q-layer.8": 1276.5874, "encoder_q-layer.9": 1156.2192, "epoch": 0.38, "inbatch_neg_score": 0.2398, "inbatch_pos_score": 0.8135, "learning_rate": 2.3000000000000003e-05, "loss": 3.8272, "norm_diff": 0.0991, "norm_loss": 0.0, "num_token_doc": 66.4907, "num_token_overlap": 11.6827, "num_token_query": 31.2561, "num_token_union": 64.8674, "num_word_context": 202.1568, "num_word_doc": 49.6131, "num_word_query": 23.1897, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1976.2983, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2415, "query_norm": 1.2984, "queue_k_norm": 1.4039, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2561, "sent_len_1": 66.4907, "sent_len_max_0": 127.4125, "sent_len_max_1": 189.415, "stdk": 0.0473, "stdq": 0.0425, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 58600 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.8374, "doc_norm": 1.4006, "encoder_q-embeddings": 1550.0132, "encoder_q-layer.0": 1009.6519, "encoder_q-layer.1": 1129.4824, "encoder_q-layer.10": 1199.7433, "encoder_q-layer.11": 2871.1436, "encoder_q-layer.2": 1285.7014, "encoder_q-layer.3": 1325.2725, "encoder_q-layer.4": 1452.932, "encoder_q-layer.5": 1236.5403, "encoder_q-layer.6": 1224.3047, "encoder_q-layer.7": 1242.0492, "encoder_q-layer.8": 1287.0529, "encoder_q-layer.9": 1166.8717, "epoch": 0.38, "inbatch_neg_score": 0.2402, "inbatch_pos_score": 0.8003, "learning_rate": 2.2944444444444446e-05, "loss": 3.8374, "norm_diff": 0.0985, "norm_loss": 0.0, "num_token_doc": 66.6514, "num_token_overlap": 11.6686, "num_token_query": 31.493, "num_token_union": 65.113, "num_word_context": 202.6492, "num_word_doc": 49.7652, "num_word_query": 23.3958, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2194.9324, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.24, "query_norm": 1.3021, "queue_k_norm": 1.4048, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.493, "sent_len_1": 66.6514, "sent_len_max_0": 127.4775, "sent_len_max_1": 189.3175, "stdk": 0.0474, "stdq": 0.0426, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 58700 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.8584, "doc_norm": 1.4125, "encoder_q-embeddings": 3959.9531, "encoder_q-layer.0": 2816.8831, "encoder_q-layer.1": 3117.5615, "encoder_q-layer.10": 1330.8912, "encoder_q-layer.11": 3236.3547, "encoder_q-layer.2": 3759.4094, "encoder_q-layer.3": 3576.0034, "encoder_q-layer.4": 3983.8555, "encoder_q-layer.5": 3910.012, "encoder_q-layer.6": 3671.4092, "encoder_q-layer.7": 2753.947, "encoder_q-layer.8": 2103.4336, "encoder_q-layer.9": 1375.6104, "epoch": 0.38, "inbatch_neg_score": 0.2427, "inbatch_pos_score": 0.7959, "learning_rate": 2.288888888888889e-05, "loss": 3.8584, "norm_diff": 0.1221, "norm_loss": 0.0, "num_token_doc": 66.6, "num_token_overlap": 11.6556, "num_token_query": 31.3118, "num_token_union": 64.9908, "num_word_context": 201.9854, "num_word_doc": 49.6793, "num_word_query": 23.2339, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4871.3177, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2433, "query_norm": 1.2904, "queue_k_norm": 1.4024, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3118, "sent_len_1": 66.6, "sent_len_max_0": 127.475, "sent_len_max_1": 188.7287, "stdk": 0.0478, "stdq": 0.0421, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 58800 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.8176, "doc_norm": 1.4122, "encoder_q-embeddings": 1314.8278, "encoder_q-layer.0": 897.4314, "encoder_q-layer.1": 931.5994, "encoder_q-layer.10": 1231.5038, "encoder_q-layer.11": 3153.2712, "encoder_q-layer.2": 1085.0919, "encoder_q-layer.3": 1143.9595, "encoder_q-layer.4": 1281.2869, "encoder_q-layer.5": 1271.8188, "encoder_q-layer.6": 1312.7141, "encoder_q-layer.7": 1282.8898, "encoder_q-layer.8": 1327.0565, "encoder_q-layer.9": 1199.0591, "epoch": 0.38, "inbatch_neg_score": 0.2473, "inbatch_pos_score": 0.8008, "learning_rate": 2.2833333333333334e-05, "loss": 3.8176, "norm_diff": 0.1096, "norm_loss": 0.0, "num_token_doc": 66.702, "num_token_overlap": 11.6407, "num_token_query": 31.3097, "num_token_union": 65.0815, "num_word_context": 202.3971, "num_word_doc": 49.7641, "num_word_query": 23.2552, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2237.8428, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2471, "query_norm": 1.3026, "queue_k_norm": 1.4056, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3097, "sent_len_1": 66.702, "sent_len_max_0": 127.5088, "sent_len_max_1": 189.8363, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 58900 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.8103, "doc_norm": 1.4085, "encoder_q-embeddings": 1294.4183, "encoder_q-layer.0": 874.3439, "encoder_q-layer.1": 918.5468, "encoder_q-layer.10": 1225.3196, "encoder_q-layer.11": 3024.7451, "encoder_q-layer.2": 1045.6462, "encoder_q-layer.3": 1082.1779, "encoder_q-layer.4": 1148.9943, "encoder_q-layer.5": 1156.6042, "encoder_q-layer.6": 1148.47, "encoder_q-layer.7": 1225.74, "encoder_q-layer.8": 1404.7423, "encoder_q-layer.9": 1201.4471, "epoch": 0.38, "inbatch_neg_score": 0.2432, "inbatch_pos_score": 0.793, "learning_rate": 2.277777777777778e-05, "loss": 3.8103, "norm_diff": 0.1153, "norm_loss": 0.0, "num_token_doc": 66.7019, "num_token_overlap": 11.7124, "num_token_query": 31.4324, "num_token_union": 65.0967, "num_word_context": 201.9089, "num_word_doc": 49.7606, "num_word_query": 23.3635, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2135.5137, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2438, "query_norm": 1.2932, "queue_k_norm": 1.4041, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4324, "sent_len_1": 66.7019, "sent_len_max_0": 127.4112, "sent_len_max_1": 189.645, "stdk": 0.0477, "stdq": 0.042, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 59000 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.8372, "doc_norm": 1.4044, "encoder_q-embeddings": 1513.406, "encoder_q-layer.0": 1023.0184, "encoder_q-layer.1": 1211.796, "encoder_q-layer.10": 1249.9086, "encoder_q-layer.11": 2958.8384, "encoder_q-layer.2": 1387.9637, "encoder_q-layer.3": 1479.8619, "encoder_q-layer.4": 1418.7953, "encoder_q-layer.5": 1329.4719, "encoder_q-layer.6": 1260.734, "encoder_q-layer.7": 1340.7133, "encoder_q-layer.8": 1346.437, "encoder_q-layer.9": 1166.4441, "epoch": 0.38, "inbatch_neg_score": 0.2438, "inbatch_pos_score": 0.8032, "learning_rate": 2.2722222222222222e-05, "loss": 3.8372, "norm_diff": 0.0928, "norm_loss": 0.0, "num_token_doc": 66.6334, "num_token_overlap": 11.6409, "num_token_query": 31.2161, "num_token_union": 64.9345, "num_word_context": 202.2291, "num_word_doc": 49.7172, "num_word_query": 23.1749, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2306.4707, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2455, "query_norm": 1.3117, "queue_k_norm": 1.4028, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2161, "sent_len_1": 66.6334, "sent_len_max_0": 127.5525, "sent_len_max_1": 190.8413, "stdk": 0.0475, "stdq": 0.0428, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 59100 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.823, "doc_norm": 1.4054, "encoder_q-embeddings": 2871.0186, "encoder_q-layer.0": 1929.9524, "encoder_q-layer.1": 2122.4114, "encoder_q-layer.10": 2551.3374, "encoder_q-layer.11": 6151.6753, "encoder_q-layer.2": 2411.2185, "encoder_q-layer.3": 2508.3479, "encoder_q-layer.4": 2654.6274, "encoder_q-layer.5": 2612.6013, "encoder_q-layer.6": 2881.3374, "encoder_q-layer.7": 2895.1848, "encoder_q-layer.8": 2753.0842, "encoder_q-layer.9": 2398.3909, "epoch": 0.39, "inbatch_neg_score": 0.244, "inbatch_pos_score": 0.8086, "learning_rate": 2.2666666666666668e-05, "loss": 3.823, "norm_diff": 0.0969, "norm_loss": 0.0, "num_token_doc": 66.5385, "num_token_overlap": 11.6865, "num_token_query": 31.4708, "num_token_union": 65.051, "num_word_context": 202.1896, "num_word_doc": 49.6711, "num_word_query": 23.3701, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4509.9776, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2445, "query_norm": 1.3086, "queue_k_norm": 1.4046, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4708, "sent_len_1": 66.5385, "sent_len_max_0": 127.3925, "sent_len_max_1": 188.3525, "stdk": 0.0476, "stdq": 0.0427, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 59200 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.8156, "doc_norm": 1.4069, "encoder_q-embeddings": 2149.6897, "encoder_q-layer.0": 1368.3289, "encoder_q-layer.1": 1435.5417, "encoder_q-layer.10": 2511.6997, "encoder_q-layer.11": 6200.4668, "encoder_q-layer.2": 1558.5292, "encoder_q-layer.3": 1583.3462, "encoder_q-layer.4": 1684.3492, "encoder_q-layer.5": 1749.8704, "encoder_q-layer.6": 1916.7404, "encoder_q-layer.7": 2109.4844, "encoder_q-layer.8": 2479.2214, "encoder_q-layer.9": 2386.7817, "epoch": 0.39, "inbatch_neg_score": 0.2482, "inbatch_pos_score": 0.8262, "learning_rate": 2.2611111111111113e-05, "loss": 3.8156, "norm_diff": 0.1011, "norm_loss": 0.0, "num_token_doc": 66.7725, "num_token_overlap": 11.6874, "num_token_query": 31.4053, "num_token_union": 65.1591, "num_word_context": 202.3328, "num_word_doc": 49.8113, "num_word_query": 23.3188, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3900.8784, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2472, "query_norm": 1.3057, "queue_k_norm": 1.4061, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4053, "sent_len_1": 66.7725, "sent_len_max_0": 127.505, "sent_len_max_1": 189.4025, "stdk": 0.0476, "stdq": 0.0424, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 59300 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.8098, "doc_norm": 1.4076, "encoder_q-embeddings": 2448.77, "encoder_q-layer.0": 1545.9984, "encoder_q-layer.1": 1767.6311, "encoder_q-layer.10": 2480.9719, "encoder_q-layer.11": 5974.8037, "encoder_q-layer.2": 2011.4216, "encoder_q-layer.3": 2165.8069, "encoder_q-layer.4": 2197.2668, "encoder_q-layer.5": 2236.1135, "encoder_q-layer.6": 2493.5869, "encoder_q-layer.7": 2532.0347, "encoder_q-layer.8": 2628.3364, "encoder_q-layer.9": 2463.0498, "epoch": 0.39, "inbatch_neg_score": 0.2492, "inbatch_pos_score": 0.8013, "learning_rate": 2.255555555555556e-05, "loss": 3.8098, "norm_diff": 0.1163, "norm_loss": 0.0, "num_token_doc": 66.5506, "num_token_overlap": 11.7234, "num_token_query": 31.5358, "num_token_union": 65.0465, "num_word_context": 202.0247, "num_word_doc": 49.6711, "num_word_query": 23.4319, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4112.8538, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2495, "query_norm": 1.2913, "queue_k_norm": 1.4044, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5358, "sent_len_1": 66.5506, "sent_len_max_0": 127.595, "sent_len_max_1": 188.7637, "stdk": 0.0476, "stdq": 0.0416, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 59400 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 3.8304, "doc_norm": 1.4044, "encoder_q-embeddings": 3318.5762, "encoder_q-layer.0": 2264.1072, "encoder_q-layer.1": 2495.9612, "encoder_q-layer.10": 2495.6443, "encoder_q-layer.11": 5972.8315, "encoder_q-layer.2": 3083.9375, "encoder_q-layer.3": 3312.908, "encoder_q-layer.4": 3308.4839, "encoder_q-layer.5": 3036.4905, "encoder_q-layer.6": 3106.2874, "encoder_q-layer.7": 2900.4126, "encoder_q-layer.8": 2774.4905, "encoder_q-layer.9": 2322.1572, "epoch": 0.39, "inbatch_neg_score": 0.2536, "inbatch_pos_score": 0.793, "learning_rate": 2.25e-05, "loss": 3.8304, "norm_diff": 0.094, "norm_loss": 0.0, "num_token_doc": 66.6777, "num_token_overlap": 11.6643, "num_token_query": 31.3909, "num_token_union": 65.0396, "num_word_context": 202.3291, "num_word_doc": 49.7308, "num_word_query": 23.3258, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4937.9513, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2532, "query_norm": 1.3104, "queue_k_norm": 1.4069, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3909, "sent_len_1": 66.6777, "sent_len_max_0": 127.5, "sent_len_max_1": 190.5987, "stdk": 0.0475, "stdq": 0.0424, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 59500 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 3.8108, "doc_norm": 1.4019, "encoder_q-embeddings": 2424.668, "encoder_q-layer.0": 1557.2858, "encoder_q-layer.1": 1660.5779, "encoder_q-layer.10": 2725.2485, "encoder_q-layer.11": 6355.7939, "encoder_q-layer.2": 1854.954, "encoder_q-layer.3": 1906.1499, "encoder_q-layer.4": 2013.7808, "encoder_q-layer.5": 2050.0186, "encoder_q-layer.6": 2247.1887, "encoder_q-layer.7": 2306.0515, "encoder_q-layer.8": 2668.4463, "encoder_q-layer.9": 2476.8416, "epoch": 0.39, "inbatch_neg_score": 0.2545, "inbatch_pos_score": 0.8013, "learning_rate": 2.2444444444444447e-05, "loss": 3.8108, "norm_diff": 0.1112, "norm_loss": 0.0, "num_token_doc": 66.7118, "num_token_overlap": 11.6507, "num_token_query": 31.3418, "num_token_union": 65.0714, "num_word_context": 202.0585, "num_word_doc": 49.7833, "num_word_query": 23.271, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4262.2053, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2544, "query_norm": 1.2907, "queue_k_norm": 1.407, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3418, "sent_len_1": 66.7118, "sent_len_max_0": 127.595, "sent_len_max_1": 189.3875, "stdk": 0.0474, "stdq": 0.0417, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 59600 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.8049, "doc_norm": 1.4059, "encoder_q-embeddings": 2233.8457, "encoder_q-layer.0": 1477.7681, "encoder_q-layer.1": 1569.9967, "encoder_q-layer.10": 2668.1785, "encoder_q-layer.11": 6390.0225, "encoder_q-layer.2": 1781.3645, "encoder_q-layer.3": 1825.5302, "encoder_q-layer.4": 1852.6938, "encoder_q-layer.5": 1790.4487, "encoder_q-layer.6": 2059.5247, "encoder_q-layer.7": 2248.7415, "encoder_q-layer.8": 2682.6204, "encoder_q-layer.9": 2510.272, "epoch": 0.39, "inbatch_neg_score": 0.251, "inbatch_pos_score": 0.792, "learning_rate": 2.238888888888889e-05, "loss": 3.8049, "norm_diff": 0.113, "norm_loss": 0.0, "num_token_doc": 66.8099, "num_token_overlap": 11.7145, "num_token_query": 31.4544, "num_token_union": 65.1384, "num_word_context": 202.2984, "num_word_doc": 49.8534, "num_word_query": 23.37, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4125.1211, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2515, "query_norm": 1.2929, "queue_k_norm": 1.4097, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4544, "sent_len_1": 66.8099, "sent_len_max_0": 127.4525, "sent_len_max_1": 188.6037, "stdk": 0.0475, "stdq": 0.0419, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 59700 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.8133, "doc_norm": 1.4024, "encoder_q-embeddings": 3646.4797, "encoder_q-layer.0": 2751.0688, "encoder_q-layer.1": 2994.9431, "encoder_q-layer.10": 2916.1868, "encoder_q-layer.11": 5907.939, "encoder_q-layer.2": 3644.407, "encoder_q-layer.3": 3608.7532, "encoder_q-layer.4": 3188.0603, "encoder_q-layer.5": 3022.6726, "encoder_q-layer.6": 2936.2261, "encoder_q-layer.7": 3094.6611, "encoder_q-layer.8": 2762.9746, "encoder_q-layer.9": 2457.6443, "epoch": 0.39, "inbatch_neg_score": 0.2583, "inbatch_pos_score": 0.8184, "learning_rate": 2.2333333333333335e-05, "loss": 3.8133, "norm_diff": 0.1076, "norm_loss": 0.0, "num_token_doc": 66.7898, "num_token_overlap": 11.707, "num_token_query": 31.3863, "num_token_union": 65.1173, "num_word_context": 202.4143, "num_word_doc": 49.8854, "num_word_query": 23.3396, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5167.9698, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2583, "query_norm": 1.2948, "queue_k_norm": 1.4064, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3863, "sent_len_1": 66.7898, "sent_len_max_0": 127.5187, "sent_len_max_1": 187.2675, "stdk": 0.0474, "stdq": 0.0419, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 59800 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.8433, "doc_norm": 1.4121, "encoder_q-embeddings": 2600.6108, "encoder_q-layer.0": 1810.5748, "encoder_q-layer.1": 1844.1326, "encoder_q-layer.10": 2719.5293, "encoder_q-layer.11": 6207.8594, "encoder_q-layer.2": 2149.2878, "encoder_q-layer.3": 2279.8044, "encoder_q-layer.4": 2328.4819, "encoder_q-layer.5": 2376.5305, "encoder_q-layer.6": 2489.1648, "encoder_q-layer.7": 2400.7615, "encoder_q-layer.8": 2641.4746, "encoder_q-layer.9": 2363.4429, "epoch": 0.39, "inbatch_neg_score": 0.2575, "inbatch_pos_score": 0.8076, "learning_rate": 2.2277777777777778e-05, "loss": 3.8433, "norm_diff": 0.0962, "norm_loss": 0.0, "num_token_doc": 66.8156, "num_token_overlap": 11.6212, "num_token_query": 31.3294, "num_token_union": 65.1475, "num_word_context": 202.251, "num_word_doc": 49.8505, "num_word_query": 23.2644, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4336.9682, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2576, "query_norm": 1.3159, "queue_k_norm": 1.4091, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3294, "sent_len_1": 66.8156, "sent_len_max_0": 127.5362, "sent_len_max_1": 191.0513, "stdk": 0.0478, "stdq": 0.0429, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 59900 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.8351, "doc_norm": 1.415, "encoder_q-embeddings": 2242.5754, "encoder_q-layer.0": 1528.5111, "encoder_q-layer.1": 1628.1251, "encoder_q-layer.10": 2478.6116, "encoder_q-layer.11": 6358.7832, "encoder_q-layer.2": 1859.5416, "encoder_q-layer.3": 1813.3339, "encoder_q-layer.4": 1906.0105, "encoder_q-layer.5": 1837.1913, "encoder_q-layer.6": 1887.7372, "encoder_q-layer.7": 2065.0889, "encoder_q-layer.8": 2363.5979, "encoder_q-layer.9": 2267.9021, "epoch": 0.39, "inbatch_neg_score": 0.2554, "inbatch_pos_score": 0.8267, "learning_rate": 2.2222222222222223e-05, "loss": 3.8351, "norm_diff": 0.1097, "norm_loss": 0.0, "num_token_doc": 66.7648, "num_token_overlap": 11.6713, "num_token_query": 31.3674, "num_token_union": 65.0639, "num_word_context": 202.405, "num_word_doc": 49.813, "num_word_query": 23.2735, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4077.7111, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2568, "query_norm": 1.3054, "queue_k_norm": 1.4085, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3674, "sent_len_1": 66.7648, "sent_len_max_0": 127.4038, "sent_len_max_1": 190.2525, "stdk": 0.0479, "stdq": 0.0426, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 60000 }, { "dev_runtime": 30.6585, "dev_samples_per_second": 2.088, "dev_steps_per_second": 0.033, "epoch": 0.39, "step": 60000, "test_accuracy": 92.431640625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.43146035075187683, "test_doc_norm": 1.369802474975586, "test_inbatch_neg_score": 0.5731284618377686, "test_inbatch_pos_score": 1.4484477043151855, "test_loss": 0.43146035075187683, "test_loss_align": 0.9803458452224731, "test_loss_unif": 3.8675642013549805, "test_loss_unif_q@queue": 3.8675646781921387, "test_norm_diff": 0.03357753902673721, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.24199819564819336, "test_query_norm": 1.4033799171447754, "test_queue_k_norm": 1.4088401794433594, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04111966863274574, "test_stdq": 0.04125973582267761, "test_stdqueue_k": 0.04771662503480911, "test_stdqueue_q": 0.0 }, { "dev_runtime": 30.6585, "dev_samples_per_second": 2.088, "dev_steps_per_second": 0.033, "epoch": 0.39, "eval_beir-arguana_ndcg@10": 0.37018, "eval_beir-arguana_recall@10": 0.62945, "eval_beir-arguana_recall@100": 0.92888, "eval_beir-arguana_recall@20": 0.76885, "eval_beir-avg_ndcg@10": 0.3702905833333333, "eval_beir-avg_recall@10": 0.44139141666666665, "eval_beir-avg_recall@100": 0.6285786666666666, "eval_beir-avg_recall@20": 0.5037595833333333, "eval_beir-cqadupstack_ndcg@10": 0.2553458333333333, "eval_beir-cqadupstack_recall@10": 0.34888416666666666, "eval_beir-cqadupstack_recall@100": 0.5836766666666667, "eval_beir-cqadupstack_recall@20": 0.4160258333333333, "eval_beir-fiqa_ndcg@10": 0.23323, "eval_beir-fiqa_recall@10": 0.29106, "eval_beir-fiqa_recall@100": 0.56759, "eval_beir-fiqa_recall@20": 0.37733, "eval_beir-nfcorpus_ndcg@10": 0.29744, "eval_beir-nfcorpus_recall@10": 0.14229, "eval_beir-nfcorpus_recall@100": 0.27596, "eval_beir-nfcorpus_recall@20": 0.17396, "eval_beir-nq_ndcg@10": 0.27701, "eval_beir-nq_recall@10": 0.44759, "eval_beir-nq_recall@100": 0.79623, "eval_beir-nq_recall@20": 0.57124, "eval_beir-quora_ndcg@10": 0.76332, "eval_beir-quora_recall@10": 0.87348, "eval_beir-quora_recall@100": 0.97528, "eval_beir-quora_recall@20": 0.91974, "eval_beir-scidocs_ndcg@10": 0.14847, "eval_beir-scidocs_recall@10": 0.15583, "eval_beir-scidocs_recall@100": 0.3573, "eval_beir-scidocs_recall@20": 0.21233, "eval_beir-scifact_ndcg@10": 0.61602, "eval_beir-scifact_recall@10": 0.78956, "eval_beir-scifact_recall@100": 0.91378, "eval_beir-scifact_recall@20": 0.82578, "eval_beir-trec-covid_ndcg@10": 0.56604, "eval_beir-trec-covid_recall@10": 0.612, "eval_beir-trec-covid_recall@100": 0.4536, "eval_beir-trec-covid_recall@20": 0.573, "eval_beir-webis-touche2020_ndcg@10": 0.17585, "eval_beir-webis-touche2020_recall@10": 0.12377, "eval_beir-webis-touche2020_recall@100": 0.43349, "eval_beir-webis-touche2020_recall@20": 0.19934, "eval_senteval-avg_sts": 0.7645671903561044, "eval_senteval-sickr_spearman": 0.7295327281858437, "eval_senteval-stsb_spearman": 0.7996016525263651, "step": 60000, "test_accuracy": 92.431640625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.43146035075187683, "test_doc_norm": 1.369802474975586, "test_inbatch_neg_score": 0.5731284618377686, "test_inbatch_pos_score": 1.4484477043151855, "test_loss": 0.43146035075187683, "test_loss_align": 0.9803458452224731, "test_loss_unif": 3.8675642013549805, "test_loss_unif_q@queue": 3.8675646781921387, "test_norm_diff": 0.03357753902673721, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.24199819564819336, "test_query_norm": 1.4033799171447754, "test_queue_k_norm": 1.4088401794433594, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04111966863274574, "test_stdq": 0.04125973582267761, "test_stdqueue_k": 0.04771662503480911, "test_stdqueue_q": 0.0 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.8108, "doc_norm": 1.4044, "encoder_q-embeddings": 8581.7031, "encoder_q-layer.0": 6758.7935, "encoder_q-layer.1": 8002.5356, "encoder_q-layer.10": 2313.1953, "encoder_q-layer.11": 5826.5605, "encoder_q-layer.2": 10341.4277, "encoder_q-layer.3": 10740.4199, "encoder_q-layer.4": 11416.8418, "encoder_q-layer.5": 12662.4326, "encoder_q-layer.6": 14724.8018, "encoder_q-layer.7": 13277.292, "encoder_q-layer.8": 7566.1172, "encoder_q-layer.9": 2986.2651, "epoch": 0.39, "inbatch_neg_score": 0.2609, "inbatch_pos_score": 0.832, "learning_rate": 2.216666666666667e-05, "loss": 3.8108, "norm_diff": 0.0987, "norm_loss": 0.0, "num_token_doc": 66.6491, "num_token_overlap": 11.6978, "num_token_query": 31.3581, "num_token_union": 64.9849, "num_word_context": 202.1175, "num_word_doc": 49.7033, "num_word_query": 23.2978, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14230.7023, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2612, "query_norm": 1.3057, "queue_k_norm": 1.408, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3581, "sent_len_1": 66.6491, "sent_len_max_0": 127.2913, "sent_len_max_1": 188.9938, "stdk": 0.0475, "stdq": 0.0425, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 60100 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 3.8219, "doc_norm": 1.4067, "encoder_q-embeddings": 2376.2283, "encoder_q-layer.0": 1634.0067, "encoder_q-layer.1": 1715.1547, "encoder_q-layer.10": 2484.5027, "encoder_q-layer.11": 6095.4907, "encoder_q-layer.2": 2108.5371, "encoder_q-layer.3": 2137.7705, "encoder_q-layer.4": 2407.3855, "encoder_q-layer.5": 2174.854, "encoder_q-layer.6": 2266.6167, "encoder_q-layer.7": 2520.5869, "encoder_q-layer.8": 2708.2402, "encoder_q-layer.9": 2330.5469, "epoch": 0.39, "inbatch_neg_score": 0.2585, "inbatch_pos_score": 0.7925, "learning_rate": 2.211111111111111e-05, "loss": 3.8219, "norm_diff": 0.1135, "norm_loss": 0.0, "num_token_doc": 66.7523, "num_token_overlap": 11.6417, "num_token_query": 31.2131, "num_token_union": 65.0277, "num_word_context": 202.2591, "num_word_doc": 49.7981, "num_word_query": 23.187, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4166.2025, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2581, "query_norm": 1.2932, "queue_k_norm": 1.409, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2131, "sent_len_1": 66.7523, "sent_len_max_0": 127.4513, "sent_len_max_1": 189.5225, "stdk": 0.0475, "stdq": 0.0421, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 60200 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.8225, "doc_norm": 1.4133, "encoder_q-embeddings": 3547.2034, "encoder_q-layer.0": 2405.3333, "encoder_q-layer.1": 2649.3582, "encoder_q-layer.10": 2423.0852, "encoder_q-layer.11": 6196.9673, "encoder_q-layer.2": 3200.2751, "encoder_q-layer.3": 3355.3032, "encoder_q-layer.4": 3460.3882, "encoder_q-layer.5": 3100.3367, "encoder_q-layer.6": 2676.2495, "encoder_q-layer.7": 2536.4524, "encoder_q-layer.8": 2476.5332, "encoder_q-layer.9": 2274.0676, "epoch": 0.39, "inbatch_neg_score": 0.2594, "inbatch_pos_score": 0.8208, "learning_rate": 2.2055555555555557e-05, "loss": 3.8225, "norm_diff": 0.1232, "norm_loss": 0.0, "num_token_doc": 66.8076, "num_token_overlap": 11.641, "num_token_query": 31.3533, "num_token_union": 65.1429, "num_word_context": 202.588, "num_word_doc": 49.8608, "num_word_query": 23.2941, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5031.922, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2593, "query_norm": 1.29, "queue_k_norm": 1.4076, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3533, "sent_len_1": 66.8076, "sent_len_max_0": 127.3275, "sent_len_max_1": 188.4, "stdk": 0.0477, "stdq": 0.042, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 60300 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.827, "doc_norm": 1.4153, "encoder_q-embeddings": 2677.6907, "encoder_q-layer.0": 1904.5531, "encoder_q-layer.1": 2111.2129, "encoder_q-layer.10": 2736.3418, "encoder_q-layer.11": 6040.1699, "encoder_q-layer.2": 2498.7351, "encoder_q-layer.3": 2510.593, "encoder_q-layer.4": 2590.8157, "encoder_q-layer.5": 2481.4607, "encoder_q-layer.6": 2478.5852, "encoder_q-layer.7": 2607.7522, "encoder_q-layer.8": 2911.9578, "encoder_q-layer.9": 2432.4194, "epoch": 0.39, "inbatch_neg_score": 0.2555, "inbatch_pos_score": 0.8188, "learning_rate": 2.2000000000000003e-05, "loss": 3.827, "norm_diff": 0.1199, "norm_loss": 0.0, "num_token_doc": 66.5756, "num_token_overlap": 11.6305, "num_token_query": 31.2794, "num_token_union": 64.9711, "num_word_context": 202.0708, "num_word_doc": 49.7058, "num_word_query": 23.2207, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4447.4155, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2546, "query_norm": 1.2953, "queue_k_norm": 1.4106, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2794, "sent_len_1": 66.5756, "sent_len_max_0": 127.4625, "sent_len_max_1": 189.6438, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 60400 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.8344, "doc_norm": 1.4165, "encoder_q-embeddings": 2193.167, "encoder_q-layer.0": 1481.1937, "encoder_q-layer.1": 1531.8489, "encoder_q-layer.10": 2534.0898, "encoder_q-layer.11": 6685.8691, "encoder_q-layer.2": 1709.1069, "encoder_q-layer.3": 1713.885, "encoder_q-layer.4": 1754.5004, "encoder_q-layer.5": 1728.8003, "encoder_q-layer.6": 1973.3105, "encoder_q-layer.7": 2144.1096, "encoder_q-layer.8": 2606.0845, "encoder_q-layer.9": 2437.4119, "epoch": 0.39, "inbatch_neg_score": 0.2589, "inbatch_pos_score": 0.8208, "learning_rate": 2.1944444444444445e-05, "loss": 3.8344, "norm_diff": 0.1176, "norm_loss": 0.0, "num_token_doc": 66.7235, "num_token_overlap": 11.6253, "num_token_query": 31.1728, "num_token_union": 64.9641, "num_word_context": 202.3199, "num_word_doc": 49.7829, "num_word_query": 23.1338, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4155.1701, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2583, "query_norm": 1.299, "queue_k_norm": 1.4105, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.1728, "sent_len_1": 66.7235, "sent_len_max_0": 127.4625, "sent_len_max_1": 189.7063, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 60500 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.8172, "doc_norm": 1.4113, "encoder_q-embeddings": 3458.2783, "encoder_q-layer.0": 2398.7805, "encoder_q-layer.1": 2592.0122, "encoder_q-layer.10": 2451.1218, "encoder_q-layer.11": 6047.9277, "encoder_q-layer.2": 3043.916, "encoder_q-layer.3": 3116.4248, "encoder_q-layer.4": 3495.1616, "encoder_q-layer.5": 3233.4924, "encoder_q-layer.6": 2954.1785, "encoder_q-layer.7": 2706.042, "encoder_q-layer.8": 2573.0811, "encoder_q-layer.9": 2335.5071, "epoch": 0.39, "inbatch_neg_score": 0.2557, "inbatch_pos_score": 0.8193, "learning_rate": 2.188888888888889e-05, "loss": 3.8172, "norm_diff": 0.1104, "norm_loss": 0.0, "num_token_doc": 66.9604, "num_token_overlap": 11.7088, "num_token_query": 31.3489, "num_token_union": 65.1597, "num_word_context": 202.2463, "num_word_doc": 49.9765, "num_word_query": 23.3004, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4941.3004, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2559, "query_norm": 1.3009, "queue_k_norm": 1.4105, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3489, "sent_len_1": 66.9604, "sent_len_max_0": 127.51, "sent_len_max_1": 189.5275, "stdk": 0.0476, "stdq": 0.0425, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 60600 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.7961, "doc_norm": 1.4116, "encoder_q-embeddings": 3264.5369, "encoder_q-layer.0": 2257.3713, "encoder_q-layer.1": 2570.7266, "encoder_q-layer.10": 2379.4866, "encoder_q-layer.11": 5975.6782, "encoder_q-layer.2": 3140.7332, "encoder_q-layer.3": 3327.2288, "encoder_q-layer.4": 3333.3352, "encoder_q-layer.5": 3450.6704, "encoder_q-layer.6": 3213.9465, "encoder_q-layer.7": 2855.1675, "encoder_q-layer.8": 2613.2207, "encoder_q-layer.9": 2324.6099, "epoch": 0.4, "inbatch_neg_score": 0.2531, "inbatch_pos_score": 0.8135, "learning_rate": 2.1833333333333333e-05, "loss": 3.7961, "norm_diff": 0.13, "norm_loss": 0.0, "num_token_doc": 66.9599, "num_token_overlap": 11.7044, "num_token_query": 31.4826, "num_token_union": 65.2863, "num_word_context": 202.5921, "num_word_doc": 49.9851, "num_word_query": 23.3962, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4857.2889, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2534, "query_norm": 1.2816, "queue_k_norm": 1.4111, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4826, "sent_len_1": 66.9599, "sent_len_max_0": 127.415, "sent_len_max_1": 192.18, "stdk": 0.0477, "stdq": 0.0418, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 60700 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.8142, "doc_norm": 1.4099, "encoder_q-embeddings": 2399.3315, "encoder_q-layer.0": 1564.6041, "encoder_q-layer.1": 1695.4968, "encoder_q-layer.10": 2493.5083, "encoder_q-layer.11": 6327.9209, "encoder_q-layer.2": 1924.5112, "encoder_q-layer.3": 2040.2527, "encoder_q-layer.4": 2112.5337, "encoder_q-layer.5": 2261.2205, "encoder_q-layer.6": 2417.709, "encoder_q-layer.7": 2668.8123, "encoder_q-layer.8": 2741.8179, "encoder_q-layer.9": 2417.0371, "epoch": 0.4, "inbatch_neg_score": 0.247, "inbatch_pos_score": 0.7974, "learning_rate": 2.177777777777778e-05, "loss": 3.8142, "norm_diff": 0.1132, "norm_loss": 0.0, "num_token_doc": 66.8202, "num_token_overlap": 11.7033, "num_token_query": 31.5148, "num_token_union": 65.1831, "num_word_context": 202.3688, "num_word_doc": 49.8715, "num_word_query": 23.4278, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4218.5875, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2483, "query_norm": 1.2967, "queue_k_norm": 1.4124, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5148, "sent_len_1": 66.8202, "sent_len_max_0": 127.53, "sent_len_max_1": 188.545, "stdk": 0.0476, "stdq": 0.0424, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 60800 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.7984, "doc_norm": 1.4082, "encoder_q-embeddings": 2938.2827, "encoder_q-layer.0": 2030.5565, "encoder_q-layer.1": 2260.8867, "encoder_q-layer.10": 2623.0237, "encoder_q-layer.11": 6117.3213, "encoder_q-layer.2": 2707.5364, "encoder_q-layer.3": 2868.5259, "encoder_q-layer.4": 3021.3752, "encoder_q-layer.5": 3139.2412, "encoder_q-layer.6": 3023.239, "encoder_q-layer.7": 2783.9067, "encoder_q-layer.8": 2890.7979, "encoder_q-layer.9": 2487.7673, "epoch": 0.4, "inbatch_neg_score": 0.252, "inbatch_pos_score": 0.8066, "learning_rate": 2.1722222222222225e-05, "loss": 3.7984, "norm_diff": 0.0993, "norm_loss": 0.0, "num_token_doc": 66.9912, "num_token_overlap": 11.7082, "num_token_query": 31.4661, "num_token_union": 65.2888, "num_word_context": 202.5681, "num_word_doc": 49.9821, "num_word_query": 23.3694, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4676.0816, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2517, "query_norm": 1.3089, "queue_k_norm": 1.4102, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4661, "sent_len_1": 66.9912, "sent_len_max_0": 127.365, "sent_len_max_1": 190.5362, "stdk": 0.0475, "stdq": 0.0428, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 60900 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.8069, "doc_norm": 1.4046, "encoder_q-embeddings": 4358.231, "encoder_q-layer.0": 2910.5542, "encoder_q-layer.1": 3272.9094, "encoder_q-layer.10": 2360.3274, "encoder_q-layer.11": 5611.7129, "encoder_q-layer.2": 3665.9556, "encoder_q-layer.3": 3505.6824, "encoder_q-layer.4": 3421.2039, "encoder_q-layer.5": 3312.1877, "encoder_q-layer.6": 3139.1274, "encoder_q-layer.7": 2818.5325, "encoder_q-layer.8": 2530.0447, "encoder_q-layer.9": 2214.5796, "epoch": 0.4, "inbatch_neg_score": 0.25, "inbatch_pos_score": 0.8184, "learning_rate": 2.1666666666666667e-05, "loss": 3.8069, "norm_diff": 0.1065, "norm_loss": 0.0, "num_token_doc": 66.6816, "num_token_overlap": 11.6395, "num_token_query": 31.3213, "num_token_union": 65.0859, "num_word_context": 202.3442, "num_word_doc": 49.7242, "num_word_query": 23.2439, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5247.9355, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2505, "query_norm": 1.298, "queue_k_norm": 1.4105, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3213, "sent_len_1": 66.6816, "sent_len_max_0": 127.48, "sent_len_max_1": 189.2675, "stdk": 0.0474, "stdq": 0.0424, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 61000 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.8041, "doc_norm": 1.4166, "encoder_q-embeddings": 2217.6711, "encoder_q-layer.0": 1534.2561, "encoder_q-layer.1": 1672.6003, "encoder_q-layer.10": 2534.8162, "encoder_q-layer.11": 5707.3486, "encoder_q-layer.2": 1875.9373, "encoder_q-layer.3": 1953.0435, "encoder_q-layer.4": 2000.5262, "encoder_q-layer.5": 2090.2954, "encoder_q-layer.6": 2141.9146, "encoder_q-layer.7": 2099.7781, "encoder_q-layer.8": 2384.9233, "encoder_q-layer.9": 2283.3591, "epoch": 0.4, "inbatch_neg_score": 0.2502, "inbatch_pos_score": 0.8232, "learning_rate": 2.1611111111111113e-05, "loss": 3.8041, "norm_diff": 0.1168, "norm_loss": 0.0, "num_token_doc": 66.8864, "num_token_overlap": 11.6778, "num_token_query": 31.3613, "num_token_union": 65.1812, "num_word_context": 202.6062, "num_word_doc": 49.893, "num_word_query": 23.2878, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3873.308, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2505, "query_norm": 1.2998, "queue_k_norm": 1.4103, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3613, "sent_len_1": 66.8864, "sent_len_max_0": 127.4762, "sent_len_max_1": 188.39, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 61100 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.7993, "doc_norm": 1.4087, "encoder_q-embeddings": 4390.2349, "encoder_q-layer.0": 2931.1545, "encoder_q-layer.1": 3071.7739, "encoder_q-layer.10": 4453.9336, "encoder_q-layer.11": 11325.0137, "encoder_q-layer.2": 3530.1619, "encoder_q-layer.3": 3688.8894, "encoder_q-layer.4": 3751.5737, "encoder_q-layer.5": 4009.1975, "encoder_q-layer.6": 4390.9414, "encoder_q-layer.7": 4499.9702, "encoder_q-layer.8": 4909.9111, "encoder_q-layer.9": 4315.2886, "epoch": 0.4, "inbatch_neg_score": 0.2509, "inbatch_pos_score": 0.8086, "learning_rate": 2.1555555555555555e-05, "loss": 3.7993, "norm_diff": 0.1222, "norm_loss": 0.0, "num_token_doc": 66.7855, "num_token_overlap": 11.6784, "num_token_query": 31.3975, "num_token_union": 65.1368, "num_word_context": 202.1516, "num_word_doc": 49.8263, "num_word_query": 23.3194, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7612.4975, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2505, "query_norm": 1.2864, "queue_k_norm": 1.4106, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3975, "sent_len_1": 66.7855, "sent_len_max_0": 127.4287, "sent_len_max_1": 190.2775, "stdk": 0.0476, "stdq": 0.0418, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 61200 }, { "accuracy": 42.7734, "active_queue_size": 16384.0, "cl_loss": 3.8, "doc_norm": 1.3976, "encoder_q-embeddings": 6408.376, "encoder_q-layer.0": 4715.3877, "encoder_q-layer.1": 5742.9722, "encoder_q-layer.10": 3002.4648, "encoder_q-layer.11": 6935.3496, "encoder_q-layer.2": 7115.8545, "encoder_q-layer.3": 7618.4609, "encoder_q-layer.4": 8430.0107, "encoder_q-layer.5": 6831.5586, "encoder_q-layer.6": 4764.6973, "encoder_q-layer.7": 4732.1064, "encoder_q-layer.8": 4184.3667, "encoder_q-layer.9": 2873.2744, "epoch": 0.4, "inbatch_neg_score": 0.2567, "inbatch_pos_score": 0.7778, "learning_rate": 2.15e-05, "loss": 3.8, "norm_diff": 0.0883, "norm_loss": 0.0, "num_token_doc": 66.7455, "num_token_overlap": 11.6945, "num_token_query": 31.5259, "num_token_union": 65.1786, "num_word_context": 201.7958, "num_word_doc": 49.7827, "num_word_query": 23.4088, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8809.5651, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2549, "query_norm": 1.3093, "queue_k_norm": 1.4099, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5259, "sent_len_1": 66.7455, "sent_len_max_0": 127.5975, "sent_len_max_1": 189.105, "stdk": 0.0471, "stdq": 0.0426, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 61300 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 3.81, "doc_norm": 1.4108, "encoder_q-embeddings": 2247.8911, "encoder_q-layer.0": 1491.5104, "encoder_q-layer.1": 1579.3837, "encoder_q-layer.10": 2495.8218, "encoder_q-layer.11": 6097.3481, "encoder_q-layer.2": 1833.0594, "encoder_q-layer.3": 1858.0437, "encoder_q-layer.4": 1985.2671, "encoder_q-layer.5": 2055.458, "encoder_q-layer.6": 2356.9644, "encoder_q-layer.7": 2451.282, "encoder_q-layer.8": 2613.3389, "encoder_q-layer.9": 2383.248, "epoch": 0.4, "inbatch_neg_score": 0.2571, "inbatch_pos_score": 0.814, "learning_rate": 2.1444444444444443e-05, "loss": 3.81, "norm_diff": 0.0978, "norm_loss": 0.0, "num_token_doc": 66.7872, "num_token_overlap": 11.7133, "num_token_query": 31.4661, "num_token_union": 65.1605, "num_word_context": 202.2726, "num_word_doc": 49.839, "num_word_query": 23.3807, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4064.7717, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2573, "query_norm": 1.313, "queue_k_norm": 1.4113, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4661, "sent_len_1": 66.7872, "sent_len_max_0": 127.4813, "sent_len_max_1": 191.3137, "stdk": 0.0476, "stdq": 0.0426, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 61400 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.8173, "doc_norm": 1.4077, "encoder_q-embeddings": 3310.895, "encoder_q-layer.0": 2417.6702, "encoder_q-layer.1": 2614.9226, "encoder_q-layer.10": 2573.2673, "encoder_q-layer.11": 6255.1338, "encoder_q-layer.2": 2918.5864, "encoder_q-layer.3": 2802.187, "encoder_q-layer.4": 2881.4331, "encoder_q-layer.5": 2911.314, "encoder_q-layer.6": 3363.1479, "encoder_q-layer.7": 3085.0576, "encoder_q-layer.8": 2775.0356, "encoder_q-layer.9": 2412.5771, "epoch": 0.4, "inbatch_neg_score": 0.2582, "inbatch_pos_score": 0.8237, "learning_rate": 2.138888888888889e-05, "loss": 3.8173, "norm_diff": 0.1007, "norm_loss": 0.0, "num_token_doc": 66.9042, "num_token_overlap": 11.7102, "num_token_query": 31.409, "num_token_union": 65.1471, "num_word_context": 202.2709, "num_word_doc": 49.9035, "num_word_query": 23.321, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4961.6374, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2583, "query_norm": 1.307, "queue_k_norm": 1.4113, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.409, "sent_len_1": 66.9042, "sent_len_max_0": 127.535, "sent_len_max_1": 190.8512, "stdk": 0.0475, "stdq": 0.0423, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 61500 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.8044, "doc_norm": 1.4083, "encoder_q-embeddings": 2476.2061, "encoder_q-layer.0": 1648.9816, "encoder_q-layer.1": 1683.6123, "encoder_q-layer.10": 2837.4897, "encoder_q-layer.11": 6522.7861, "encoder_q-layer.2": 1883.0741, "encoder_q-layer.3": 1972.1676, "encoder_q-layer.4": 1952.8647, "encoder_q-layer.5": 2111.697, "encoder_q-layer.6": 2313.2595, "encoder_q-layer.7": 2410.6458, "encoder_q-layer.8": 2759.5498, "encoder_q-layer.9": 2511.0903, "epoch": 0.4, "inbatch_neg_score": 0.2607, "inbatch_pos_score": 0.8096, "learning_rate": 2.1333333333333335e-05, "loss": 3.8044, "norm_diff": 0.1059, "norm_loss": 0.0, "num_token_doc": 67.0137, "num_token_overlap": 11.7123, "num_token_query": 31.4161, "num_token_union": 65.2617, "num_word_context": 202.6231, "num_word_doc": 50.0338, "num_word_query": 23.3313, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4303.2947, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2615, "query_norm": 1.3024, "queue_k_norm": 1.4122, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4161, "sent_len_1": 67.0137, "sent_len_max_0": 127.4062, "sent_len_max_1": 189.435, "stdk": 0.0475, "stdq": 0.042, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 61600 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.7922, "doc_norm": 1.4143, "encoder_q-embeddings": 3335.8865, "encoder_q-layer.0": 2258.7661, "encoder_q-layer.1": 2587.1958, "encoder_q-layer.10": 2434.5933, "encoder_q-layer.11": 6132.4839, "encoder_q-layer.2": 2827.21, "encoder_q-layer.3": 2655.5679, "encoder_q-layer.4": 2735.6807, "encoder_q-layer.5": 2835.5657, "encoder_q-layer.6": 2828.5198, "encoder_q-layer.7": 2730.4822, "encoder_q-layer.8": 2753.2375, "encoder_q-layer.9": 2356.0735, "epoch": 0.4, "inbatch_neg_score": 0.2642, "inbatch_pos_score": 0.8232, "learning_rate": 2.127777777777778e-05, "loss": 3.7922, "norm_diff": 0.1033, "norm_loss": 0.0, "num_token_doc": 66.6673, "num_token_overlap": 11.7287, "num_token_query": 31.5554, "num_token_union": 65.1391, "num_word_context": 202.4316, "num_word_doc": 49.7715, "num_word_query": 23.4771, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4727.6303, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2642, "query_norm": 1.311, "queue_k_norm": 1.4099, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5554, "sent_len_1": 66.6673, "sent_len_max_0": 127.5138, "sent_len_max_1": 190.5037, "stdk": 0.0477, "stdq": 0.0422, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 61700 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 3.7874, "doc_norm": 1.4035, "encoder_q-embeddings": 5990.0039, "encoder_q-layer.0": 4213.2676, "encoder_q-layer.1": 4924.9082, "encoder_q-layer.10": 1240.7346, "encoder_q-layer.11": 3246.675, "encoder_q-layer.2": 6317.0586, "encoder_q-layer.3": 7192.0601, "encoder_q-layer.4": 7136.4287, "encoder_q-layer.5": 6507.6187, "encoder_q-layer.6": 4971.4766, "encoder_q-layer.7": 3208.6785, "encoder_q-layer.8": 1668.9287, "encoder_q-layer.9": 1287.514, "epoch": 0.4, "inbatch_neg_score": 0.2658, "inbatch_pos_score": 0.8208, "learning_rate": 2.1222222222222223e-05, "loss": 3.7874, "norm_diff": 0.094, "norm_loss": 0.0, "num_token_doc": 66.6951, "num_token_overlap": 11.6872, "num_token_query": 31.4165, "num_token_union": 65.1098, "num_word_context": 202.2817, "num_word_doc": 49.7955, "num_word_query": 23.342, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7492.1146, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2659, "query_norm": 1.3095, "queue_k_norm": 1.4121, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4165, "sent_len_1": 66.6951, "sent_len_max_0": 127.3137, "sent_len_max_1": 187.5425, "stdk": 0.0473, "stdq": 0.042, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 61800 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.8028, "doc_norm": 1.4094, "encoder_q-embeddings": 1908.7507, "encoder_q-layer.0": 1359.9218, "encoder_q-layer.1": 1471.9208, "encoder_q-layer.10": 1222.9937, "encoder_q-layer.11": 3065.1028, "encoder_q-layer.2": 1826.3425, "encoder_q-layer.3": 2028.2739, "encoder_q-layer.4": 2009.9047, "encoder_q-layer.5": 1783.4745, "encoder_q-layer.6": 1758.6356, "encoder_q-layer.7": 1648.244, "encoder_q-layer.8": 1547.6851, "encoder_q-layer.9": 1297.3483, "epoch": 0.4, "inbatch_neg_score": 0.2735, "inbatch_pos_score": 0.812, "learning_rate": 2.116666666666667e-05, "loss": 3.8028, "norm_diff": 0.0953, "norm_loss": 0.0, "num_token_doc": 66.7685, "num_token_overlap": 11.6892, "num_token_query": 31.5334, "num_token_union": 65.2133, "num_word_context": 202.2179, "num_word_doc": 49.8132, "num_word_query": 23.4147, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2774.9233, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2729, "query_norm": 1.3141, "queue_k_norm": 1.4088, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.5334, "sent_len_1": 66.7685, "sent_len_max_0": 127.5375, "sent_len_max_1": 189.9137, "stdk": 0.0475, "stdq": 0.042, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 61900 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.8112, "doc_norm": 1.4161, "encoder_q-embeddings": 1118.458, "encoder_q-layer.0": 746.6039, "encoder_q-layer.1": 838.1993, "encoder_q-layer.10": 1239.4928, "encoder_q-layer.11": 3108.1323, "encoder_q-layer.2": 910.2305, "encoder_q-layer.3": 904.2726, "encoder_q-layer.4": 895.4504, "encoder_q-layer.5": 933.8599, "encoder_q-layer.6": 1037.6588, "encoder_q-layer.7": 1130.4019, "encoder_q-layer.8": 1304.4152, "encoder_q-layer.9": 1237.2339, "epoch": 0.4, "inbatch_neg_score": 0.2789, "inbatch_pos_score": 0.8516, "learning_rate": 2.111111111111111e-05, "loss": 3.8112, "norm_diff": 0.0749, "norm_loss": 0.0, "num_token_doc": 66.6475, "num_token_overlap": 11.6803, "num_token_query": 31.3661, "num_token_union": 65.0074, "num_word_context": 202.2457, "num_word_doc": 49.7361, "num_word_query": 23.3135, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2027.5503, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2788, "query_norm": 1.3412, "queue_k_norm": 1.4113, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3661, "sent_len_1": 66.6475, "sent_len_max_0": 127.5037, "sent_len_max_1": 188.2937, "stdk": 0.0478, "stdq": 0.0431, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 62000 }, { "accuracy": 43.1641, "active_queue_size": 16384.0, "cl_loss": 3.8068, "doc_norm": 1.4139, "encoder_q-embeddings": 2657.1941, "encoder_q-layer.0": 1978.2023, "encoder_q-layer.1": 2202.012, "encoder_q-layer.10": 1302.9061, "encoder_q-layer.11": 3138.1191, "encoder_q-layer.2": 2684.585, "encoder_q-layer.3": 2837.9922, "encoder_q-layer.4": 2797.5237, "encoder_q-layer.5": 2572.7756, "encoder_q-layer.6": 2215.0674, "encoder_q-layer.7": 2028.9843, "encoder_q-layer.8": 1433.6959, "encoder_q-layer.9": 1259.3236, "epoch": 0.4, "inbatch_neg_score": 0.2828, "inbatch_pos_score": 0.8281, "learning_rate": 2.1055555555555556e-05, "loss": 3.8068, "norm_diff": 0.0965, "norm_loss": 0.0, "num_token_doc": 66.5027, "num_token_overlap": 11.6389, "num_token_query": 31.4048, "num_token_union": 64.9838, "num_word_context": 202.1815, "num_word_doc": 49.6184, "num_word_query": 23.3247, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3508.469, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2822, "query_norm": 1.3174, "queue_k_norm": 1.4127, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4048, "sent_len_1": 66.5027, "sent_len_max_0": 127.575, "sent_len_max_1": 189.57, "stdk": 0.0476, "stdq": 0.0419, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 62100 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 3.8182, "doc_norm": 1.4153, "encoder_q-embeddings": 1745.0096, "encoder_q-layer.0": 1252.4685, "encoder_q-layer.1": 1340.2261, "encoder_q-layer.10": 1203.6021, "encoder_q-layer.11": 3168.8804, "encoder_q-layer.2": 1617.4398, "encoder_q-layer.3": 1671.5507, "encoder_q-layer.4": 1621.1682, "encoder_q-layer.5": 1588.1096, "encoder_q-layer.6": 1464.1698, "encoder_q-layer.7": 1426.1619, "encoder_q-layer.8": 1460.7592, "encoder_q-layer.9": 1245.1664, "epoch": 0.4, "inbatch_neg_score": 0.2851, "inbatch_pos_score": 0.834, "learning_rate": 2.1e-05, "loss": 3.8182, "norm_diff": 0.0837, "norm_loss": 0.0, "num_token_doc": 66.8611, "num_token_overlap": 11.64, "num_token_query": 31.4773, "num_token_union": 65.2835, "num_word_context": 202.527, "num_word_doc": 49.8385, "num_word_query": 23.3508, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2503.0358, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2839, "query_norm": 1.3315, "queue_k_norm": 1.4141, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4773, "sent_len_1": 66.8611, "sent_len_max_0": 127.3287, "sent_len_max_1": 190.8738, "stdk": 0.0476, "stdq": 0.0426, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 62200 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.8048, "doc_norm": 1.4203, "encoder_q-embeddings": 1164.8876, "encoder_q-layer.0": 782.2957, "encoder_q-layer.1": 861.9965, "encoder_q-layer.10": 1191.2971, "encoder_q-layer.11": 3084.4358, "encoder_q-layer.2": 1006.8613, "encoder_q-layer.3": 1054.7142, "encoder_q-layer.4": 1145.0327, "encoder_q-layer.5": 1115.0569, "encoder_q-layer.6": 1154.6172, "encoder_q-layer.7": 1227.1482, "encoder_q-layer.8": 1368.5636, "encoder_q-layer.9": 1192.7249, "epoch": 0.41, "inbatch_neg_score": 0.2854, "inbatch_pos_score": 0.8545, "learning_rate": 2.0944444444444445e-05, "loss": 3.8048, "norm_diff": 0.0916, "norm_loss": 0.0, "num_token_doc": 66.5998, "num_token_overlap": 11.6318, "num_token_query": 31.3595, "num_token_union": 65.0525, "num_word_context": 202.4188, "num_word_doc": 49.6688, "num_word_query": 23.2858, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2106.8125, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2856, "query_norm": 1.3287, "queue_k_norm": 1.4128, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3595, "sent_len_1": 66.5998, "sent_len_max_0": 127.365, "sent_len_max_1": 190.4162, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 62300 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 3.8115, "doc_norm": 1.4197, "encoder_q-embeddings": 1333.5704, "encoder_q-layer.0": 837.8602, "encoder_q-layer.1": 903.1711, "encoder_q-layer.10": 1299.9216, "encoder_q-layer.11": 3201.4827, "encoder_q-layer.2": 1001.3248, "encoder_q-layer.3": 1069.6241, "encoder_q-layer.4": 1217.3208, "encoder_q-layer.5": 1228.0936, "encoder_q-layer.6": 1336.1011, "encoder_q-layer.7": 1349.1185, "encoder_q-layer.8": 1397.8206, "encoder_q-layer.9": 1319.611, "epoch": 0.41, "inbatch_neg_score": 0.2889, "inbatch_pos_score": 0.8369, "learning_rate": 2.088888888888889e-05, "loss": 3.8115, "norm_diff": 0.0865, "norm_loss": 0.0, "num_token_doc": 66.7019, "num_token_overlap": 11.692, "num_token_query": 31.4565, "num_token_union": 65.1379, "num_word_context": 202.4462, "num_word_doc": 49.8149, "num_word_query": 23.385, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2208.8165, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2878, "query_norm": 1.3332, "queue_k_norm": 1.4154, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4565, "sent_len_1": 66.7019, "sent_len_max_0": 127.5975, "sent_len_max_1": 187.99, "stdk": 0.0478, "stdq": 0.0427, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 62400 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.8039, "doc_norm": 1.4163, "encoder_q-embeddings": 1283.5164, "encoder_q-layer.0": 868.0697, "encoder_q-layer.1": 929.5142, "encoder_q-layer.10": 1233.4857, "encoder_q-layer.11": 2918.8455, "encoder_q-layer.2": 1061.3086, "encoder_q-layer.3": 1088.0322, "encoder_q-layer.4": 1067.7495, "encoder_q-layer.5": 1060.4971, "encoder_q-layer.6": 1078.8448, "encoder_q-layer.7": 1079.7446, "encoder_q-layer.8": 1181.7852, "encoder_q-layer.9": 1109.0812, "epoch": 0.41, "inbatch_neg_score": 0.2915, "inbatch_pos_score": 0.8379, "learning_rate": 2.0833333333333336e-05, "loss": 3.8039, "norm_diff": 0.096, "norm_loss": 0.0, "num_token_doc": 66.9889, "num_token_overlap": 11.6548, "num_token_query": 31.3581, "num_token_union": 65.2461, "num_word_context": 202.5288, "num_word_doc": 49.9517, "num_word_query": 23.2867, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2027.3459, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2913, "query_norm": 1.3203, "queue_k_norm": 1.4181, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3581, "sent_len_1": 66.9889, "sent_len_max_0": 127.4412, "sent_len_max_1": 191.0588, "stdk": 0.0476, "stdq": 0.0422, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 62500 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.7974, "doc_norm": 1.4138, "encoder_q-embeddings": 1229.3251, "encoder_q-layer.0": 804.437, "encoder_q-layer.1": 870.3751, "encoder_q-layer.10": 1260.6743, "encoder_q-layer.11": 3085.7324, "encoder_q-layer.2": 1002.1608, "encoder_q-layer.3": 1072.2041, "encoder_q-layer.4": 1140.786, "encoder_q-layer.5": 1101.8088, "encoder_q-layer.6": 1057.2189, "encoder_q-layer.7": 1151.9218, "encoder_q-layer.8": 1349.7843, "encoder_q-layer.9": 1243.8241, "epoch": 0.41, "inbatch_neg_score": 0.2901, "inbatch_pos_score": 0.8379, "learning_rate": 2.077777777777778e-05, "loss": 3.7974, "norm_diff": 0.0953, "norm_loss": 0.0, "num_token_doc": 66.5896, "num_token_overlap": 11.6633, "num_token_query": 31.4722, "num_token_union": 65.0892, "num_word_context": 202.4919, "num_word_doc": 49.716, "num_word_query": 23.3711, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2082.587, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2888, "query_norm": 1.3185, "queue_k_norm": 1.4177, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4722, "sent_len_1": 66.5896, "sent_len_max_0": 127.4788, "sent_len_max_1": 189.2138, "stdk": 0.0475, "stdq": 0.0424, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 62600 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 3.8074, "doc_norm": 1.4033, "encoder_q-embeddings": 1377.3396, "encoder_q-layer.0": 979.3058, "encoder_q-layer.1": 985.6123, "encoder_q-layer.10": 1259.2695, "encoder_q-layer.11": 3114.5034, "encoder_q-layer.2": 1108.0289, "encoder_q-layer.3": 1121.3248, "encoder_q-layer.4": 1098.9049, "encoder_q-layer.5": 1099.0643, "encoder_q-layer.6": 1091.1802, "encoder_q-layer.7": 1243.5671, "encoder_q-layer.8": 1278.6074, "encoder_q-layer.9": 1205.006, "epoch": 0.41, "inbatch_neg_score": 0.2913, "inbatch_pos_score": 0.8472, "learning_rate": 2.0722222222222224e-05, "loss": 3.8074, "norm_diff": 0.0953, "norm_loss": 0.0, "num_token_doc": 66.358, "num_token_overlap": 11.7179, "num_token_query": 31.5048, "num_token_union": 64.9126, "num_word_context": 202.0539, "num_word_doc": 49.5107, "num_word_query": 23.4019, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2167.4156, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.29, "query_norm": 1.308, "queue_k_norm": 1.4163, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.5048, "sent_len_1": 66.358, "sent_len_max_0": 127.4075, "sent_len_max_1": 189.7988, "stdk": 0.0471, "stdq": 0.0421, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 62700 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.8028, "doc_norm": 1.4222, "encoder_q-embeddings": 2011.5327, "encoder_q-layer.0": 1368.0166, "encoder_q-layer.1": 1531.9084, "encoder_q-layer.10": 1288.0643, "encoder_q-layer.11": 3171.3372, "encoder_q-layer.2": 1718.0741, "encoder_q-layer.3": 1783.6648, "encoder_q-layer.4": 1923.5486, "encoder_q-layer.5": 2050.3564, "encoder_q-layer.6": 1938.7942, "encoder_q-layer.7": 2001.9343, "encoder_q-layer.8": 1895.0393, "encoder_q-layer.9": 1359.4843, "epoch": 0.41, "inbatch_neg_score": 0.2898, "inbatch_pos_score": 0.855, "learning_rate": 2.0666666666666666e-05, "loss": 3.8028, "norm_diff": 0.1169, "norm_loss": 0.0, "num_token_doc": 66.6848, "num_token_overlap": 11.658, "num_token_query": 31.3899, "num_token_union": 65.1198, "num_word_context": 202.1784, "num_word_doc": 49.7867, "num_word_query": 23.3391, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2871.7757, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2891, "query_norm": 1.3053, "queue_k_norm": 1.4192, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3899, "sent_len_1": 66.6848, "sent_len_max_0": 127.4737, "sent_len_max_1": 187.2488, "stdk": 0.0478, "stdq": 0.042, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 62800 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.7895, "doc_norm": 1.424, "encoder_q-embeddings": 2426.2268, "encoder_q-layer.0": 1539.1703, "encoder_q-layer.1": 1835.8755, "encoder_q-layer.10": 1265.5282, "encoder_q-layer.11": 3052.345, "encoder_q-layer.2": 2019.3555, "encoder_q-layer.3": 1766.6973, "encoder_q-layer.4": 1636.6019, "encoder_q-layer.5": 1556.3527, "encoder_q-layer.6": 1454.8519, "encoder_q-layer.7": 1419.4684, "encoder_q-layer.8": 1458.0374, "encoder_q-layer.9": 1212.7643, "epoch": 0.41, "inbatch_neg_score": 0.2913, "inbatch_pos_score": 0.8594, "learning_rate": 2.0611111111111112e-05, "loss": 3.7895, "norm_diff": 0.1091, "norm_loss": 0.0, "num_token_doc": 66.5621, "num_token_overlap": 11.6786, "num_token_query": 31.554, "num_token_union": 65.1448, "num_word_context": 202.3666, "num_word_doc": 49.6615, "num_word_query": 23.4264, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2826.4375, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.291, "query_norm": 1.3148, "queue_k_norm": 1.4188, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.554, "sent_len_1": 66.5621, "sent_len_max_0": 127.455, "sent_len_max_1": 189.9162, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 62900 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.7868, "doc_norm": 1.4146, "encoder_q-embeddings": 1172.4264, "encoder_q-layer.0": 800.877, "encoder_q-layer.1": 834.3101, "encoder_q-layer.10": 1209.7931, "encoder_q-layer.11": 3040.1104, "encoder_q-layer.2": 956.6046, "encoder_q-layer.3": 962.1185, "encoder_q-layer.4": 1011.7999, "encoder_q-layer.5": 1016.251, "encoder_q-layer.6": 1052.092, "encoder_q-layer.7": 1083.7427, "encoder_q-layer.8": 1234.6118, "encoder_q-layer.9": 1132.9275, "epoch": 0.41, "inbatch_neg_score": 0.2914, "inbatch_pos_score": 0.8433, "learning_rate": 2.0555555555555555e-05, "loss": 3.7868, "norm_diff": 0.1053, "norm_loss": 0.0, "num_token_doc": 66.7057, "num_token_overlap": 11.632, "num_token_query": 31.2081, "num_token_union": 65.0296, "num_word_context": 202.2456, "num_word_doc": 49.7809, "num_word_query": 23.1921, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2016.9892, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2913, "query_norm": 1.3092, "queue_k_norm": 1.4212, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2081, "sent_len_1": 66.7057, "sent_len_max_0": 127.2013, "sent_len_max_1": 188.8175, "stdk": 0.0474, "stdq": 0.0422, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 63000 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.8093, "doc_norm": 1.4122, "encoder_q-embeddings": 1366.6429, "encoder_q-layer.0": 907.0641, "encoder_q-layer.1": 1005.9563, "encoder_q-layer.10": 1202.5354, "encoder_q-layer.11": 3096.1873, "encoder_q-layer.2": 1109.0701, "encoder_q-layer.3": 1134.5233, "encoder_q-layer.4": 1146.3127, "encoder_q-layer.5": 1085.5413, "encoder_q-layer.6": 1135.2289, "encoder_q-layer.7": 1174.7319, "encoder_q-layer.8": 1272.8491, "encoder_q-layer.9": 1187.1783, "epoch": 0.41, "inbatch_neg_score": 0.2882, "inbatch_pos_score": 0.8418, "learning_rate": 2.05e-05, "loss": 3.8093, "norm_diff": 0.1045, "norm_loss": 0.0, "num_token_doc": 66.5729, "num_token_overlap": 11.6501, "num_token_query": 31.2538, "num_token_union": 64.9642, "num_word_context": 201.9549, "num_word_doc": 49.6341, "num_word_query": 23.2026, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2156.3371, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2881, "query_norm": 1.3077, "queue_k_norm": 1.4206, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2538, "sent_len_1": 66.5729, "sent_len_max_0": 127.4887, "sent_len_max_1": 189.8913, "stdk": 0.0474, "stdq": 0.0422, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 63100 }, { "accuracy": 44.3359, "active_queue_size": 16384.0, "cl_loss": 3.7813, "doc_norm": 1.4194, "encoder_q-embeddings": 1527.0605, "encoder_q-layer.0": 1070.1025, "encoder_q-layer.1": 1156.6312, "encoder_q-layer.10": 1223.5449, "encoder_q-layer.11": 3095.0872, "encoder_q-layer.2": 1386.1615, "encoder_q-layer.3": 1410.6241, "encoder_q-layer.4": 1403.2496, "encoder_q-layer.5": 1259.9768, "encoder_q-layer.6": 1279.3145, "encoder_q-layer.7": 1248.4664, "encoder_q-layer.8": 1255.6694, "encoder_q-layer.9": 1153.1106, "epoch": 0.41, "inbatch_neg_score": 0.2937, "inbatch_pos_score": 0.8306, "learning_rate": 2.0444444444444446e-05, "loss": 3.7813, "norm_diff": 0.114, "norm_loss": 0.0, "num_token_doc": 66.8337, "num_token_overlap": 11.681, "num_token_query": 31.499, "num_token_union": 65.2615, "num_word_context": 202.4113, "num_word_doc": 49.9063, "num_word_query": 23.3707, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2315.5342, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2935, "query_norm": 1.3054, "queue_k_norm": 1.4226, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.499, "sent_len_1": 66.8337, "sent_len_max_0": 127.4725, "sent_len_max_1": 188.2337, "stdk": 0.0476, "stdq": 0.0419, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 63200 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.7956, "doc_norm": 1.4251, "encoder_q-embeddings": 1161.9812, "encoder_q-layer.0": 779.4741, "encoder_q-layer.1": 810.2249, "encoder_q-layer.10": 1219.5367, "encoder_q-layer.11": 2916.7437, "encoder_q-layer.2": 890.9628, "encoder_q-layer.3": 924.7909, "encoder_q-layer.4": 961.4735, "encoder_q-layer.5": 984.6244, "encoder_q-layer.6": 1024.9719, "encoder_q-layer.7": 1179.8828, "encoder_q-layer.8": 1294.1447, "encoder_q-layer.9": 1077.2767, "epoch": 0.41, "inbatch_neg_score": 0.2924, "inbatch_pos_score": 0.874, "learning_rate": 2.0388888888888892e-05, "loss": 3.7956, "norm_diff": 0.102, "norm_loss": 0.0, "num_token_doc": 66.8138, "num_token_overlap": 11.7386, "num_token_query": 31.569, "num_token_union": 65.2109, "num_word_context": 202.2, "num_word_doc": 49.8395, "num_word_query": 23.4477, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1980.8669, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2927, "query_norm": 1.3232, "queue_k_norm": 1.4238, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.569, "sent_len_1": 66.8138, "sent_len_max_0": 127.565, "sent_len_max_1": 190.04, "stdk": 0.0478, "stdq": 0.0426, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 63300 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.8128, "doc_norm": 1.4199, "encoder_q-embeddings": 1096.1158, "encoder_q-layer.0": 692.8481, "encoder_q-layer.1": 735.5238, "encoder_q-layer.10": 1249.9591, "encoder_q-layer.11": 3042.4436, "encoder_q-layer.2": 825.5258, "encoder_q-layer.3": 857.165, "encoder_q-layer.4": 900.6885, "encoder_q-layer.5": 888.6927, "encoder_q-layer.6": 1041.3657, "encoder_q-layer.7": 1123.0299, "encoder_q-layer.8": 1280.6101, "encoder_q-layer.9": 1199.9962, "epoch": 0.41, "inbatch_neg_score": 0.2947, "inbatch_pos_score": 0.8467, "learning_rate": 2.0333333333333334e-05, "loss": 3.8128, "norm_diff": 0.1116, "norm_loss": 0.0, "num_token_doc": 66.8013, "num_token_overlap": 11.6132, "num_token_query": 31.1266, "num_token_union": 65.0255, "num_word_context": 202.099, "num_word_doc": 49.8323, "num_word_query": 23.117, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1974.6284, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2947, "query_norm": 1.3083, "queue_k_norm": 1.4222, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.1266, "sent_len_1": 66.8013, "sent_len_max_0": 127.3787, "sent_len_max_1": 191.0387, "stdk": 0.0476, "stdq": 0.042, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 63400 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.7983, "doc_norm": 1.4284, "encoder_q-embeddings": 1308.4896, "encoder_q-layer.0": 857.7935, "encoder_q-layer.1": 902.8447, "encoder_q-layer.10": 1245.4642, "encoder_q-layer.11": 3165.6929, "encoder_q-layer.2": 1048.3082, "encoder_q-layer.3": 1103.4186, "encoder_q-layer.4": 1118.1613, "encoder_q-layer.5": 1145.6299, "encoder_q-layer.6": 1180.8448, "encoder_q-layer.7": 1207.9706, "encoder_q-layer.8": 1318.2325, "encoder_q-layer.9": 1188.2233, "epoch": 0.41, "inbatch_neg_score": 0.2913, "inbatch_pos_score": 0.8486, "learning_rate": 2.027777777777778e-05, "loss": 3.7983, "norm_diff": 0.1157, "norm_loss": 0.0, "num_token_doc": 66.7895, "num_token_overlap": 11.6728, "num_token_query": 31.274, "num_token_union": 65.1015, "num_word_context": 202.2667, "num_word_doc": 49.8444, "num_word_query": 23.2214, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2133.1209, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2913, "query_norm": 1.3127, "queue_k_norm": 1.4243, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.274, "sent_len_1": 66.7895, "sent_len_max_0": 127.5662, "sent_len_max_1": 189.3025, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 63500 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.7745, "doc_norm": 1.4182, "encoder_q-embeddings": 4893.8984, "encoder_q-layer.0": 3395.5342, "encoder_q-layer.1": 3677.2532, "encoder_q-layer.10": 1340.6061, "encoder_q-layer.11": 3218.1284, "encoder_q-layer.2": 4451.4019, "encoder_q-layer.3": 4782.2915, "encoder_q-layer.4": 5030.291, "encoder_q-layer.5": 5777.8833, "encoder_q-layer.6": 5002.0474, "encoder_q-layer.7": 5213.085, "encoder_q-layer.8": 3468.1084, "encoder_q-layer.9": 1615.803, "epoch": 0.41, "inbatch_neg_score": 0.2875, "inbatch_pos_score": 0.8457, "learning_rate": 2.0222222222222222e-05, "loss": 3.7745, "norm_diff": 0.0903, "norm_loss": 0.0, "num_token_doc": 66.8082, "num_token_overlap": 11.665, "num_token_query": 31.3383, "num_token_union": 65.1524, "num_word_context": 202.075, "num_word_doc": 49.8301, "num_word_query": 23.2641, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6352.3245, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2874, "query_norm": 1.3279, "queue_k_norm": 1.425, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3383, "sent_len_1": 66.8082, "sent_len_max_0": 127.4838, "sent_len_max_1": 190.5075, "stdk": 0.0475, "stdq": 0.043, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 63600 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.8135, "doc_norm": 1.4229, "encoder_q-embeddings": 2192.5327, "encoder_q-layer.0": 1490.5995, "encoder_q-layer.1": 1560.4744, "encoder_q-layer.10": 1212.0454, "encoder_q-layer.11": 2999.8237, "encoder_q-layer.2": 1627.8809, "encoder_q-layer.3": 1784.6908, "encoder_q-layer.4": 1739.0148, "encoder_q-layer.5": 1566.6959, "encoder_q-layer.6": 1453.1786, "encoder_q-layer.7": 1459.714, "encoder_q-layer.8": 1347.7375, "encoder_q-layer.9": 1155.6226, "epoch": 0.41, "inbatch_neg_score": 0.2839, "inbatch_pos_score": 0.8325, "learning_rate": 2.0166666666666668e-05, "loss": 3.8135, "norm_diff": 0.1223, "norm_loss": 0.0, "num_token_doc": 66.7704, "num_token_overlap": 11.6285, "num_token_query": 31.3294, "num_token_union": 65.1101, "num_word_context": 202.152, "num_word_doc": 49.7893, "num_word_query": 23.2512, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2668.3656, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2852, "query_norm": 1.3006, "queue_k_norm": 1.4241, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3294, "sent_len_1": 66.7704, "sent_len_max_0": 127.5162, "sent_len_max_1": 189.6513, "stdk": 0.0477, "stdq": 0.0419, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 63700 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.8013, "doc_norm": 1.4269, "encoder_q-embeddings": 2213.3062, "encoder_q-layer.0": 1508.6538, "encoder_q-layer.1": 1659.8252, "encoder_q-layer.10": 2364.7385, "encoder_q-layer.11": 5712.8843, "encoder_q-layer.2": 1936.5957, "encoder_q-layer.3": 1894.5079, "encoder_q-layer.4": 2086.3032, "encoder_q-layer.5": 2149.4746, "encoder_q-layer.6": 2111.5535, "encoder_q-layer.7": 2406.4893, "encoder_q-layer.8": 2452.114, "encoder_q-layer.9": 2208.4927, "epoch": 0.42, "inbatch_neg_score": 0.2893, "inbatch_pos_score": 0.8564, "learning_rate": 2.011111111111111e-05, "loss": 3.8013, "norm_diff": 0.1194, "norm_loss": 0.0, "num_token_doc": 66.6895, "num_token_overlap": 11.7377, "num_token_query": 31.5098, "num_token_union": 65.1188, "num_word_context": 202.3597, "num_word_doc": 49.8144, "num_word_query": 23.4138, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3920.4704, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2886, "query_norm": 1.3075, "queue_k_norm": 1.4243, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5098, "sent_len_1": 66.6895, "sent_len_max_0": 127.525, "sent_len_max_1": 187.6925, "stdk": 0.0478, "stdq": 0.0421, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 63800 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.7896, "doc_norm": 1.4222, "encoder_q-embeddings": 3543.5146, "encoder_q-layer.0": 2651.8601, "encoder_q-layer.1": 2679.302, "encoder_q-layer.10": 2553.9966, "encoder_q-layer.11": 6058.0547, "encoder_q-layer.2": 2763.7236, "encoder_q-layer.3": 2504.6987, "encoder_q-layer.4": 2583.9053, "encoder_q-layer.5": 2255.3977, "encoder_q-layer.6": 2222.0112, "encoder_q-layer.7": 2469.1602, "encoder_q-layer.8": 2668.7668, "encoder_q-layer.9": 2340.8879, "epoch": 0.42, "inbatch_neg_score": 0.2837, "inbatch_pos_score": 0.8398, "learning_rate": 2.0055555555555556e-05, "loss": 3.7896, "norm_diff": 0.1048, "norm_loss": 0.0, "num_token_doc": 67.0054, "num_token_overlap": 11.6561, "num_token_query": 31.2988, "num_token_union": 65.2406, "num_word_context": 202.5457, "num_word_doc": 49.998, "num_word_query": 23.2595, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4631.6851, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2847, "query_norm": 1.3174, "queue_k_norm": 1.424, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2988, "sent_len_1": 67.0054, "sent_len_max_0": 127.5037, "sent_len_max_1": 191.0888, "stdk": 0.0476, "stdq": 0.0426, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 63900 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 3.8043, "doc_norm": 1.4299, "encoder_q-embeddings": 3275.9316, "encoder_q-layer.0": 2303.4211, "encoder_q-layer.1": 2664.0645, "encoder_q-layer.10": 2629.0044, "encoder_q-layer.11": 6244.8252, "encoder_q-layer.2": 3232.1965, "encoder_q-layer.3": 3230.1958, "encoder_q-layer.4": 3247.0066, "encoder_q-layer.5": 3116.3469, "encoder_q-layer.6": 2840.8474, "encoder_q-layer.7": 2832.6414, "encoder_q-layer.8": 2869.0139, "encoder_q-layer.9": 2382.6528, "epoch": 0.42, "inbatch_neg_score": 0.2833, "inbatch_pos_score": 0.8433, "learning_rate": 2e-05, "loss": 3.8043, "norm_diff": 0.1204, "norm_loss": 0.0, "num_token_doc": 66.871, "num_token_overlap": 11.6419, "num_token_query": 31.2075, "num_token_union": 65.0707, "num_word_context": 202.441, "num_word_doc": 49.909, "num_word_query": 23.1609, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4972.8042, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2832, "query_norm": 1.3095, "queue_k_norm": 1.4244, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2075, "sent_len_1": 66.871, "sent_len_max_0": 127.505, "sent_len_max_1": 188.2887, "stdk": 0.0479, "stdq": 0.0422, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 64000 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.8088, "doc_norm": 1.4272, "encoder_q-embeddings": 2507.0469, "encoder_q-layer.0": 1654.0919, "encoder_q-layer.1": 1755.5347, "encoder_q-layer.10": 2487.0879, "encoder_q-layer.11": 6126.3096, "encoder_q-layer.2": 2020.7877, "encoder_q-layer.3": 2130.5332, "encoder_q-layer.4": 2157.5115, "encoder_q-layer.5": 2215.5879, "encoder_q-layer.6": 2343.1262, "encoder_q-layer.7": 2442.3416, "encoder_q-layer.8": 2547.582, "encoder_q-layer.9": 2265.1387, "epoch": 0.42, "inbatch_neg_score": 0.2849, "inbatch_pos_score": 0.8374, "learning_rate": 1.9944444444444447e-05, "loss": 3.8088, "norm_diff": 0.1149, "norm_loss": 0.0, "num_token_doc": 66.5434, "num_token_overlap": 11.6647, "num_token_query": 31.4028, "num_token_union": 65.0018, "num_word_context": 202.3686, "num_word_doc": 49.6989, "num_word_query": 23.3327, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4175.2726, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2844, "query_norm": 1.3123, "queue_k_norm": 1.4234, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4028, "sent_len_1": 66.5434, "sent_len_max_0": 127.485, "sent_len_max_1": 188.4025, "stdk": 0.0478, "stdq": 0.0423, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 64100 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.786, "doc_norm": 1.4188, "encoder_q-embeddings": 2094.176, "encoder_q-layer.0": 1412.7231, "encoder_q-layer.1": 1444.5062, "encoder_q-layer.10": 2362.8423, "encoder_q-layer.11": 5926.1797, "encoder_q-layer.2": 1682.634, "encoder_q-layer.3": 1727.9148, "encoder_q-layer.4": 1747.4442, "encoder_q-layer.5": 1713.0, "encoder_q-layer.6": 1883.4766, "encoder_q-layer.7": 2038.9723, "encoder_q-layer.8": 2534.7046, "encoder_q-layer.9": 2304.0933, "epoch": 0.42, "inbatch_neg_score": 0.2861, "inbatch_pos_score": 0.8501, "learning_rate": 1.988888888888889e-05, "loss": 3.786, "norm_diff": 0.1068, "norm_loss": 0.0, "num_token_doc": 66.9146, "num_token_overlap": 11.6819, "num_token_query": 31.409, "num_token_union": 65.2117, "num_word_context": 202.0959, "num_word_doc": 49.9088, "num_word_query": 23.3111, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3818.9096, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2856, "query_norm": 1.312, "queue_k_norm": 1.4241, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.409, "sent_len_1": 66.9146, "sent_len_max_0": 127.4975, "sent_len_max_1": 189.2038, "stdk": 0.0475, "stdq": 0.0423, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 64200 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.8005, "doc_norm": 1.4253, "encoder_q-embeddings": 3378.0369, "encoder_q-layer.0": 2268.5178, "encoder_q-layer.1": 2284.2561, "encoder_q-layer.10": 2663.9268, "encoder_q-layer.11": 6266.0859, "encoder_q-layer.2": 2798.6055, "encoder_q-layer.3": 2820.1877, "encoder_q-layer.4": 2850.293, "encoder_q-layer.5": 2756.084, "encoder_q-layer.6": 2750.4519, "encoder_q-layer.7": 2722.2585, "encoder_q-layer.8": 2800.9714, "encoder_q-layer.9": 2499.2947, "epoch": 0.42, "inbatch_neg_score": 0.2853, "inbatch_pos_score": 0.8647, "learning_rate": 1.9833333333333335e-05, "loss": 3.8005, "norm_diff": 0.1174, "norm_loss": 0.0, "num_token_doc": 66.7871, "num_token_overlap": 11.6718, "num_token_query": 31.3707, "num_token_union": 65.1337, "num_word_context": 202.5746, "num_word_doc": 49.8408, "num_word_query": 23.2907, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4854.1423, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2869, "query_norm": 1.3079, "queue_k_norm": 1.4219, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3707, "sent_len_1": 66.7871, "sent_len_max_0": 127.4038, "sent_len_max_1": 189.085, "stdk": 0.0477, "stdq": 0.042, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 64300 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.7859, "doc_norm": 1.4256, "encoder_q-embeddings": 2416.8398, "encoder_q-layer.0": 1598.0923, "encoder_q-layer.1": 1670.479, "encoder_q-layer.10": 2438.9038, "encoder_q-layer.11": 6062.7676, "encoder_q-layer.2": 1887.5062, "encoder_q-layer.3": 1920.5116, "encoder_q-layer.4": 2055.7505, "encoder_q-layer.5": 2108.4351, "encoder_q-layer.6": 2187.1094, "encoder_q-layer.7": 2256.1304, "encoder_q-layer.8": 2685.7683, "encoder_q-layer.9": 2401.146, "epoch": 0.42, "inbatch_neg_score": 0.288, "inbatch_pos_score": 0.8662, "learning_rate": 1.9777777777777778e-05, "loss": 3.7859, "norm_diff": 0.1014, "norm_loss": 0.0, "num_token_doc": 66.7521, "num_token_overlap": 11.7101, "num_token_query": 31.4252, "num_token_union": 65.0543, "num_word_context": 202.2253, "num_word_doc": 49.8444, "num_word_query": 23.3501, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4096.1377, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2886, "query_norm": 1.3242, "queue_k_norm": 1.4233, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4252, "sent_len_1": 66.7521, "sent_len_max_0": 127.56, "sent_len_max_1": 188.9162, "stdk": 0.0478, "stdq": 0.0426, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 64400 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.7971, "doc_norm": 1.425, "encoder_q-embeddings": 3912.238, "encoder_q-layer.0": 2786.4639, "encoder_q-layer.1": 3121.7678, "encoder_q-layer.10": 2392.6277, "encoder_q-layer.11": 5923.5176, "encoder_q-layer.2": 3752.8059, "encoder_q-layer.3": 3957.7661, "encoder_q-layer.4": 4071.6577, "encoder_q-layer.5": 3948.2942, "encoder_q-layer.6": 3718.5076, "encoder_q-layer.7": 3255.9724, "encoder_q-layer.8": 2896.0681, "encoder_q-layer.9": 2307.7061, "epoch": 0.42, "inbatch_neg_score": 0.2905, "inbatch_pos_score": 0.8486, "learning_rate": 1.9722222222222224e-05, "loss": 3.7971, "norm_diff": 0.1073, "norm_loss": 0.0, "num_token_doc": 66.8982, "num_token_overlap": 11.6985, "num_token_query": 31.4899, "num_token_union": 65.2165, "num_word_context": 202.6483, "num_word_doc": 49.8808, "num_word_query": 23.3959, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5476.0157, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.292, "query_norm": 1.3177, "queue_k_norm": 1.4234, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4899, "sent_len_1": 66.8982, "sent_len_max_0": 127.6475, "sent_len_max_1": 190.33, "stdk": 0.0478, "stdq": 0.0422, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 64500 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 3.7868, "doc_norm": 1.4233, "encoder_q-embeddings": 2619.9382, "encoder_q-layer.0": 1735.0046, "encoder_q-layer.1": 1765.3707, "encoder_q-layer.10": 2409.6306, "encoder_q-layer.11": 5858.3057, "encoder_q-layer.2": 2057.9092, "encoder_q-layer.3": 2204.1851, "encoder_q-layer.4": 2220.9546, "encoder_q-layer.5": 2129.3516, "encoder_q-layer.6": 2238.3745, "encoder_q-layer.7": 2526.8901, "encoder_q-layer.8": 2539.8555, "encoder_q-layer.9": 2305.0706, "epoch": 0.42, "inbatch_neg_score": 0.2979, "inbatch_pos_score": 0.8452, "learning_rate": 1.9666666666666666e-05, "loss": 3.7868, "norm_diff": 0.1074, "norm_loss": 0.0, "num_token_doc": 66.7047, "num_token_overlap": 11.7363, "num_token_query": 31.5164, "num_token_union": 65.1034, "num_word_context": 202.5905, "num_word_doc": 49.8052, "num_word_query": 23.4292, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4151.3501, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2971, "query_norm": 1.3159, "queue_k_norm": 1.4243, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5164, "sent_len_1": 66.7047, "sent_len_max_0": 127.5713, "sent_len_max_1": 186.3725, "stdk": 0.0477, "stdq": 0.0421, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 64600 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.7983, "doc_norm": 1.426, "encoder_q-embeddings": 4194.7822, "encoder_q-layer.0": 2996.2661, "encoder_q-layer.1": 3401.0901, "encoder_q-layer.10": 2458.5581, "encoder_q-layer.11": 5727.6221, "encoder_q-layer.2": 4221.9014, "encoder_q-layer.3": 4563.4907, "encoder_q-layer.4": 4316.0464, "encoder_q-layer.5": 3695.0527, "encoder_q-layer.6": 3252.4685, "encoder_q-layer.7": 3027.1096, "encoder_q-layer.8": 3192.3225, "encoder_q-layer.9": 2594.52, "epoch": 0.42, "inbatch_neg_score": 0.2941, "inbatch_pos_score": 0.8623, "learning_rate": 1.9611111111111115e-05, "loss": 3.7983, "norm_diff": 0.0881, "norm_loss": 0.0, "num_token_doc": 66.7463, "num_token_overlap": 11.7052, "num_token_query": 31.3429, "num_token_union": 65.0338, "num_word_context": 202.2748, "num_word_doc": 49.8219, "num_word_query": 23.2987, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5774.8881, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2947, "query_norm": 1.3379, "queue_k_norm": 1.4257, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3429, "sent_len_1": 66.7463, "sent_len_max_0": 127.5438, "sent_len_max_1": 191.825, "stdk": 0.0477, "stdq": 0.0431, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 64700 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.7718, "doc_norm": 1.4209, "encoder_q-embeddings": 5576.4922, "encoder_q-layer.0": 3944.2354, "encoder_q-layer.1": 4480.0142, "encoder_q-layer.10": 2546.6721, "encoder_q-layer.11": 6102.2476, "encoder_q-layer.2": 5437.7598, "encoder_q-layer.3": 6257.2246, "encoder_q-layer.4": 6851.1753, "encoder_q-layer.5": 7913.3169, "encoder_q-layer.6": 7538.9556, "encoder_q-layer.7": 6907.6465, "encoder_q-layer.8": 6209.5645, "encoder_q-layer.9": 3127.7766, "epoch": 0.42, "inbatch_neg_score": 0.2996, "inbatch_pos_score": 0.855, "learning_rate": 1.9555555555555557e-05, "loss": 3.7718, "norm_diff": 0.0923, "norm_loss": 0.0, "num_token_doc": 67.0198, "num_token_overlap": 11.721, "num_token_query": 31.4246, "num_token_union": 65.2652, "num_word_context": 202.5145, "num_word_doc": 50.0125, "num_word_query": 23.3481, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8718.1231, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2991, "query_norm": 1.3286, "queue_k_norm": 1.4236, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4246, "sent_len_1": 67.0198, "sent_len_max_0": 127.3812, "sent_len_max_1": 190.0788, "stdk": 0.0476, "stdq": 0.0425, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 64800 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.7911, "doc_norm": 1.4144, "encoder_q-embeddings": 2388.927, "encoder_q-layer.0": 1579.7305, "encoder_q-layer.1": 1698.3943, "encoder_q-layer.10": 2503.2197, "encoder_q-layer.11": 6184.9507, "encoder_q-layer.2": 1948.0853, "encoder_q-layer.3": 1961.926, "encoder_q-layer.4": 1998.8948, "encoder_q-layer.5": 2057.2991, "encoder_q-layer.6": 2209.2334, "encoder_q-layer.7": 2311.5068, "encoder_q-layer.8": 2653.0679, "encoder_q-layer.9": 2407.3623, "epoch": 0.42, "inbatch_neg_score": 0.3077, "inbatch_pos_score": 0.855, "learning_rate": 1.9500000000000003e-05, "loss": 3.7911, "norm_diff": 0.0895, "norm_loss": 0.0, "num_token_doc": 66.839, "num_token_overlap": 11.6655, "num_token_query": 31.4228, "num_token_union": 65.1808, "num_word_context": 202.3514, "num_word_doc": 49.8692, "num_word_query": 23.35, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4148.9596, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3074, "query_norm": 1.3249, "queue_k_norm": 1.4251, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4228, "sent_len_1": 66.839, "sent_len_max_0": 127.4712, "sent_len_max_1": 190.7075, "stdk": 0.0473, "stdq": 0.0422, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 64900 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 3.7979, "doc_norm": 1.4194, "encoder_q-embeddings": 1977.8586, "encoder_q-layer.0": 1367.5824, "encoder_q-layer.1": 1446.6584, "encoder_q-layer.10": 2537.835, "encoder_q-layer.11": 6288.0352, "encoder_q-layer.2": 1589.1144, "encoder_q-layer.3": 1685.8517, "encoder_q-layer.4": 1748.0222, "encoder_q-layer.5": 1747.7694, "encoder_q-layer.6": 1888.3506, "encoder_q-layer.7": 2044.3796, "encoder_q-layer.8": 2403.5918, "encoder_q-layer.9": 2345.4817, "epoch": 0.42, "inbatch_neg_score": 0.3096, "inbatch_pos_score": 0.835, "learning_rate": 1.9444444444444445e-05, "loss": 3.7979, "norm_diff": 0.1058, "norm_loss": 0.0, "num_token_doc": 66.722, "num_token_overlap": 11.6656, "num_token_query": 31.2154, "num_token_union": 65.015, "num_word_context": 202.4657, "num_word_doc": 49.8151, "num_word_query": 23.1734, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3906.3876, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3086, "query_norm": 1.3136, "queue_k_norm": 1.4268, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2154, "sent_len_1": 66.722, "sent_len_max_0": 127.4025, "sent_len_max_1": 187.8812, "stdk": 0.0474, "stdq": 0.0417, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 65000 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.8019, "doc_norm": 1.4247, "encoder_q-embeddings": 4513.2788, "encoder_q-layer.0": 3132.9604, "encoder_q-layer.1": 3773.9805, "encoder_q-layer.10": 2474.7466, "encoder_q-layer.11": 6135.5137, "encoder_q-layer.2": 4271.4458, "encoder_q-layer.3": 4631.9692, "encoder_q-layer.4": 4805.5342, "encoder_q-layer.5": 4305.6074, "encoder_q-layer.6": 3938.6838, "encoder_q-layer.7": 3568.0117, "encoder_q-layer.8": 2806.291, "encoder_q-layer.9": 2441.7375, "epoch": 0.42, "inbatch_neg_score": 0.3072, "inbatch_pos_score": 0.853, "learning_rate": 1.938888888888889e-05, "loss": 3.8019, "norm_diff": 0.1039, "norm_loss": 0.0, "num_token_doc": 66.8176, "num_token_overlap": 11.6778, "num_token_query": 31.5097, "num_token_union": 65.2146, "num_word_context": 202.3823, "num_word_doc": 49.845, "num_word_query": 23.4138, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6147.4566, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3069, "query_norm": 1.3208, "queue_k_norm": 1.4257, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.5097, "sent_len_1": 66.8176, "sent_len_max_0": 127.52, "sent_len_max_1": 189.6113, "stdk": 0.0476, "stdq": 0.0422, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 65100 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.7777, "doc_norm": 1.4207, "encoder_q-embeddings": 3887.1423, "encoder_q-layer.0": 2783.6226, "encoder_q-layer.1": 2920.9963, "encoder_q-layer.10": 2456.6353, "encoder_q-layer.11": 5923.9839, "encoder_q-layer.2": 3371.9958, "encoder_q-layer.3": 3597.3816, "encoder_q-layer.4": 3465.8462, "encoder_q-layer.5": 3514.5249, "encoder_q-layer.6": 3409.1318, "encoder_q-layer.7": 2666.3403, "encoder_q-layer.8": 2741.5269, "encoder_q-layer.9": 2380.4937, "epoch": 0.42, "inbatch_neg_score": 0.3076, "inbatch_pos_score": 0.8604, "learning_rate": 1.9333333333333333e-05, "loss": 3.7777, "norm_diff": 0.0945, "norm_loss": 0.0, "num_token_doc": 66.8185, "num_token_overlap": 11.7261, "num_token_query": 31.4814, "num_token_union": 65.1578, "num_word_context": 202.2716, "num_word_doc": 49.8296, "num_word_query": 23.3887, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5220.1978, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3076, "query_norm": 1.3261, "queue_k_norm": 1.4276, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4814, "sent_len_1": 66.8185, "sent_len_max_0": 127.4137, "sent_len_max_1": 190.7413, "stdk": 0.0475, "stdq": 0.0425, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 65200 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.796, "doc_norm": 1.4276, "encoder_q-embeddings": 4084.9048, "encoder_q-layer.0": 2824.2012, "encoder_q-layer.1": 3221.5332, "encoder_q-layer.10": 2420.9062, "encoder_q-layer.11": 6339.165, "encoder_q-layer.2": 3906.7317, "encoder_q-layer.3": 4013.822, "encoder_q-layer.4": 4396.314, "encoder_q-layer.5": 4361.6152, "encoder_q-layer.6": 4318.3379, "encoder_q-layer.7": 4154.6987, "encoder_q-layer.8": 3481.3188, "encoder_q-layer.9": 2495.2964, "epoch": 0.43, "inbatch_neg_score": 0.3107, "inbatch_pos_score": 0.8823, "learning_rate": 1.927777777777778e-05, "loss": 3.796, "norm_diff": 0.0852, "norm_loss": 0.0, "num_token_doc": 66.8358, "num_token_overlap": 11.6834, "num_token_query": 31.393, "num_token_union": 65.1095, "num_word_context": 202.1297, "num_word_doc": 49.8549, "num_word_query": 23.3185, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5966.6002, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3101, "query_norm": 1.3424, "queue_k_norm": 1.4282, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.393, "sent_len_1": 66.8358, "sent_len_max_0": 127.4562, "sent_len_max_1": 189.7163, "stdk": 0.0477, "stdq": 0.0432, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 65300 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.7942, "doc_norm": 1.4286, "encoder_q-embeddings": 2442.5325, "encoder_q-layer.0": 1692.8599, "encoder_q-layer.1": 1813.3064, "encoder_q-layer.10": 2345.3943, "encoder_q-layer.11": 6116.4438, "encoder_q-layer.2": 2138.2402, "encoder_q-layer.3": 2205.644, "encoder_q-layer.4": 2325.1367, "encoder_q-layer.5": 2253.3523, "encoder_q-layer.6": 2164.8416, "encoder_q-layer.7": 2288.8569, "encoder_q-layer.8": 2619.6509, "encoder_q-layer.9": 2330.4167, "epoch": 0.43, "inbatch_neg_score": 0.3116, "inbatch_pos_score": 0.8662, "learning_rate": 1.922222222222222e-05, "loss": 3.7942, "norm_diff": 0.1222, "norm_loss": 0.0, "num_token_doc": 66.6904, "num_token_overlap": 11.6913, "num_token_query": 31.487, "num_token_union": 65.0945, "num_word_context": 202.2046, "num_word_doc": 49.7931, "num_word_query": 23.3858, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4126.3719, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3108, "query_norm": 1.3064, "queue_k_norm": 1.4297, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.487, "sent_len_1": 66.6904, "sent_len_max_0": 127.5662, "sent_len_max_1": 191.3113, "stdk": 0.0477, "stdq": 0.0417, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 65400 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.7905, "doc_norm": 1.4289, "encoder_q-embeddings": 4298.001, "encoder_q-layer.0": 3365.1514, "encoder_q-layer.1": 3372.0122, "encoder_q-layer.10": 1228.6307, "encoder_q-layer.11": 3102.0679, "encoder_q-layer.2": 4025.9304, "encoder_q-layer.3": 4054.2791, "encoder_q-layer.4": 4460.6714, "encoder_q-layer.5": 3505.9707, "encoder_q-layer.6": 2878.3613, "encoder_q-layer.7": 2561.2183, "encoder_q-layer.8": 1987.739, "encoder_q-layer.9": 1339.4158, "epoch": 0.43, "inbatch_neg_score": 0.3084, "inbatch_pos_score": 0.8638, "learning_rate": 1.9166666666666667e-05, "loss": 3.7905, "norm_diff": 0.1196, "norm_loss": 0.0, "num_token_doc": 66.7709, "num_token_overlap": 11.6889, "num_token_query": 31.4537, "num_token_union": 65.1922, "num_word_context": 202.3532, "num_word_doc": 49.8692, "num_word_query": 23.3728, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4940.6228, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3091, "query_norm": 1.3093, "queue_k_norm": 1.427, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4537, "sent_len_1": 66.7709, "sent_len_max_0": 127.515, "sent_len_max_1": 189.9525, "stdk": 0.0477, "stdq": 0.0419, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 65500 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.8016, "doc_norm": 1.4307, "encoder_q-embeddings": 1074.5525, "encoder_q-layer.0": 736.3185, "encoder_q-layer.1": 757.0916, "encoder_q-layer.10": 1211.2903, "encoder_q-layer.11": 3032.4739, "encoder_q-layer.2": 836.4974, "encoder_q-layer.3": 875.507, "encoder_q-layer.4": 914.1842, "encoder_q-layer.5": 893.8917, "encoder_q-layer.6": 955.8259, "encoder_q-layer.7": 1062.4508, "encoder_q-layer.8": 1181.0208, "encoder_q-layer.9": 1094.4731, "epoch": 0.43, "inbatch_neg_score": 0.3132, "inbatch_pos_score": 0.874, "learning_rate": 1.9111111111111113e-05, "loss": 3.8016, "norm_diff": 0.1183, "norm_loss": 0.0, "num_token_doc": 66.8781, "num_token_overlap": 11.6648, "num_token_query": 31.3477, "num_token_union": 65.1861, "num_word_context": 202.1661, "num_word_doc": 49.9124, "num_word_query": 23.2872, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1965.2454, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.314, "query_norm": 1.3124, "queue_k_norm": 1.4301, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3477, "sent_len_1": 66.8781, "sent_len_max_0": 127.4013, "sent_len_max_1": 189.8562, "stdk": 0.0478, "stdq": 0.0419, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 65600 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.7749, "doc_norm": 1.4329, "encoder_q-embeddings": 1959.2177, "encoder_q-layer.0": 1404.8179, "encoder_q-layer.1": 1568.613, "encoder_q-layer.10": 1269.0931, "encoder_q-layer.11": 3155.5657, "encoder_q-layer.2": 1832.1863, "encoder_q-layer.3": 1895.0164, "encoder_q-layer.4": 1970.0702, "encoder_q-layer.5": 1848.8951, "encoder_q-layer.6": 1643.8438, "encoder_q-layer.7": 1592.3757, "encoder_q-layer.8": 1541.5739, "encoder_q-layer.9": 1271.4758, "epoch": 0.43, "inbatch_neg_score": 0.3078, "inbatch_pos_score": 0.876, "learning_rate": 1.905555555555556e-05, "loss": 3.7749, "norm_diff": 0.0979, "norm_loss": 0.0, "num_token_doc": 66.842, "num_token_overlap": 11.6861, "num_token_query": 31.379, "num_token_union": 65.1127, "num_word_context": 202.4485, "num_word_doc": 49.8645, "num_word_query": 23.2788, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2755.8052, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3083, "query_norm": 1.335, "queue_k_norm": 1.4305, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.379, "sent_len_1": 66.842, "sent_len_max_0": 127.3175, "sent_len_max_1": 190.9825, "stdk": 0.0479, "stdq": 0.043, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 65700 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7826, "doc_norm": 1.4357, "encoder_q-embeddings": 591.1797, "encoder_q-layer.0": 386.3674, "encoder_q-layer.1": 418.4172, "encoder_q-layer.10": 626.0778, "encoder_q-layer.11": 1591.8168, "encoder_q-layer.2": 504.3302, "encoder_q-layer.3": 512.6789, "encoder_q-layer.4": 544.4678, "encoder_q-layer.5": 537.5845, "encoder_q-layer.6": 562.6879, "encoder_q-layer.7": 602.0118, "encoder_q-layer.8": 685.5323, "encoder_q-layer.9": 615.2382, "epoch": 0.43, "inbatch_neg_score": 0.3126, "inbatch_pos_score": 0.877, "learning_rate": 1.9e-05, "loss": 3.7826, "norm_diff": 0.1185, "norm_loss": 0.0, "num_token_doc": 66.877, "num_token_overlap": 11.652, "num_token_query": 31.2725, "num_token_union": 65.1381, "num_word_context": 202.3002, "num_word_doc": 49.9179, "num_word_query": 23.2176, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1066.7649, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.312, "query_norm": 1.3173, "queue_k_norm": 1.4321, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2725, "sent_len_1": 66.877, "sent_len_max_0": 127.4425, "sent_len_max_1": 189.7713, "stdk": 0.0479, "stdq": 0.0422, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 65800 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.7919, "doc_norm": 1.4284, "encoder_q-embeddings": 1524.9628, "encoder_q-layer.0": 1101.9043, "encoder_q-layer.1": 1271.5867, "encoder_q-layer.10": 591.0243, "encoder_q-layer.11": 1506.6981, "encoder_q-layer.2": 1400.2463, "encoder_q-layer.3": 1361.0782, "encoder_q-layer.4": 1371.646, "encoder_q-layer.5": 1556.5358, "encoder_q-layer.6": 1414.9066, "encoder_q-layer.7": 1271.5355, "encoder_q-layer.8": 962.4747, "encoder_q-layer.9": 599.6173, "epoch": 0.43, "inbatch_neg_score": 0.3107, "inbatch_pos_score": 0.8711, "learning_rate": 1.8944444444444447e-05, "loss": 3.7919, "norm_diff": 0.1253, "norm_loss": 0.0, "num_token_doc": 66.7781, "num_token_overlap": 11.6703, "num_token_query": 31.4249, "num_token_union": 65.1224, "num_word_context": 201.9576, "num_word_doc": 49.7731, "num_word_query": 23.3343, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1890.4109, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3113, "query_norm": 1.3031, "queue_k_norm": 1.4314, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4249, "sent_len_1": 66.7781, "sent_len_max_0": 127.5512, "sent_len_max_1": 192.5838, "stdk": 0.0477, "stdq": 0.0416, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 65900 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.7937, "doc_norm": 1.4358, "encoder_q-embeddings": 611.0062, "encoder_q-layer.0": 420.9739, "encoder_q-layer.1": 453.6769, "encoder_q-layer.10": 629.9706, "encoder_q-layer.11": 1576.2145, "encoder_q-layer.2": 521.9258, "encoder_q-layer.3": 558.0474, "encoder_q-layer.4": 572.1591, "encoder_q-layer.5": 554.1958, "encoder_q-layer.6": 588.4081, "encoder_q-layer.7": 600.8433, "encoder_q-layer.8": 684.7588, "encoder_q-layer.9": 616.3866, "epoch": 0.43, "inbatch_neg_score": 0.313, "inbatch_pos_score": 0.8691, "learning_rate": 1.888888888888889e-05, "loss": 3.7937, "norm_diff": 0.1155, "norm_loss": 0.0, "num_token_doc": 66.6764, "num_token_overlap": 11.6229, "num_token_query": 31.2488, "num_token_union": 65.0784, "num_word_context": 202.2533, "num_word_doc": 49.7471, "num_word_query": 23.2106, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1068.983, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3137, "query_norm": 1.3203, "queue_k_norm": 1.431, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2488, "sent_len_1": 66.6764, "sent_len_max_0": 127.4387, "sent_len_max_1": 189.0687, "stdk": 0.0479, "stdq": 0.0422, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 66000 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.7899, "doc_norm": 1.4326, "encoder_q-embeddings": 553.9243, "encoder_q-layer.0": 361.0567, "encoder_q-layer.1": 382.7668, "encoder_q-layer.10": 639.4095, "encoder_q-layer.11": 1544.1133, "encoder_q-layer.2": 438.2292, "encoder_q-layer.3": 447.0399, "encoder_q-layer.4": 439.3288, "encoder_q-layer.5": 450.1238, "encoder_q-layer.6": 497.4225, "encoder_q-layer.7": 541.4139, "encoder_q-layer.8": 585.7449, "encoder_q-layer.9": 572.5077, "epoch": 0.43, "inbatch_neg_score": 0.321, "inbatch_pos_score": 0.8921, "learning_rate": 1.8833333333333335e-05, "loss": 3.7899, "norm_diff": 0.1048, "norm_loss": 0.0, "num_token_doc": 66.6729, "num_token_overlap": 11.6622, "num_token_query": 31.3304, "num_token_union": 65.0177, "num_word_context": 201.933, "num_word_doc": 49.7508, "num_word_query": 23.2835, "postclip_grad_norm": 1.0, "preclip_grad_norm": 991.7751, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3198, "query_norm": 1.3278, "queue_k_norm": 1.4333, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3304, "sent_len_1": 66.6729, "sent_len_max_0": 127.4112, "sent_len_max_1": 190.05, "stdk": 0.0478, "stdq": 0.0423, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 66100 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.7712, "doc_norm": 1.4297, "encoder_q-embeddings": 724.8472, "encoder_q-layer.0": 464.4601, "encoder_q-layer.1": 502.2085, "encoder_q-layer.10": 610.7396, "encoder_q-layer.11": 1558.0913, "encoder_q-layer.2": 581.8558, "encoder_q-layer.3": 614.4023, "encoder_q-layer.4": 612.6625, "encoder_q-layer.5": 639.2635, "encoder_q-layer.6": 629.0202, "encoder_q-layer.7": 670.4008, "encoder_q-layer.8": 698.2922, "encoder_q-layer.9": 595.8771, "epoch": 0.43, "inbatch_neg_score": 0.3188, "inbatch_pos_score": 0.8853, "learning_rate": 1.8777777777777777e-05, "loss": 3.7712, "norm_diff": 0.0956, "norm_loss": 0.0, "num_token_doc": 66.8246, "num_token_overlap": 11.7043, "num_token_query": 31.3606, "num_token_union": 65.1043, "num_word_context": 202.5595, "num_word_doc": 49.8064, "num_word_query": 23.2844, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1120.9729, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3184, "query_norm": 1.3341, "queue_k_norm": 1.4319, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3606, "sent_len_1": 66.8246, "sent_len_max_0": 127.4112, "sent_len_max_1": 190.9125, "stdk": 0.0476, "stdq": 0.0426, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 66200 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 3.7796, "doc_norm": 1.4319, "encoder_q-embeddings": 608.9583, "encoder_q-layer.0": 413.2889, "encoder_q-layer.1": 441.7531, "encoder_q-layer.10": 681.0013, "encoder_q-layer.11": 1562.2654, "encoder_q-layer.2": 501.8822, "encoder_q-layer.3": 501.3599, "encoder_q-layer.4": 509.2948, "encoder_q-layer.5": 490.9963, "encoder_q-layer.6": 505.6584, "encoder_q-layer.7": 585.7184, "encoder_q-layer.8": 645.4169, "encoder_q-layer.9": 575.9976, "epoch": 0.43, "inbatch_neg_score": 0.321, "inbatch_pos_score": 0.8784, "learning_rate": 1.8722222222222223e-05, "loss": 3.7796, "norm_diff": 0.0985, "norm_loss": 0.0, "num_token_doc": 66.9124, "num_token_overlap": 11.7031, "num_token_query": 31.3966, "num_token_union": 65.2016, "num_word_context": 202.6045, "num_word_doc": 49.9822, "num_word_query": 23.3364, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1049.5507, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3201, "query_norm": 1.3335, "queue_k_norm": 1.4353, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3966, "sent_len_1": 66.9124, "sent_len_max_0": 127.5275, "sent_len_max_1": 190.0913, "stdk": 0.0477, "stdq": 0.0425, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 66300 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.7822, "doc_norm": 1.4405, "encoder_q-embeddings": 619.5431, "encoder_q-layer.0": 412.9048, "encoder_q-layer.1": 464.816, "encoder_q-layer.10": 618.98, "encoder_q-layer.11": 1558.7539, "encoder_q-layer.2": 507.8284, "encoder_q-layer.3": 524.729, "encoder_q-layer.4": 568.1287, "encoder_q-layer.5": 590.7874, "encoder_q-layer.6": 614.4725, "encoder_q-layer.7": 581.7821, "encoder_q-layer.8": 617.8312, "encoder_q-layer.9": 577.0054, "epoch": 0.43, "inbatch_neg_score": 0.3214, "inbatch_pos_score": 0.8726, "learning_rate": 1.866666666666667e-05, "loss": 3.7822, "norm_diff": 0.1189, "norm_loss": 0.0, "num_token_doc": 66.8096, "num_token_overlap": 11.7472, "num_token_query": 31.5408, "num_token_union": 65.1604, "num_word_context": 202.1768, "num_word_doc": 49.8679, "num_word_query": 23.4551, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1051.2844, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.321, "query_norm": 1.3216, "queue_k_norm": 1.4352, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5408, "sent_len_1": 66.8096, "sent_len_max_0": 127.5012, "sent_len_max_1": 188.2512, "stdk": 0.048, "stdq": 0.042, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 66400 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.7663, "doc_norm": 1.4275, "encoder_q-embeddings": 589.1292, "encoder_q-layer.0": 381.239, "encoder_q-layer.1": 407.5798, "encoder_q-layer.10": 663.3187, "encoder_q-layer.11": 1569.2297, "encoder_q-layer.2": 446.5315, "encoder_q-layer.3": 470.9703, "encoder_q-layer.4": 492.8135, "encoder_q-layer.5": 506.0142, "encoder_q-layer.6": 547.7007, "encoder_q-layer.7": 586.4628, "encoder_q-layer.8": 720.3773, "encoder_q-layer.9": 641.8994, "epoch": 0.43, "inbatch_neg_score": 0.3227, "inbatch_pos_score": 0.8833, "learning_rate": 1.861111111111111e-05, "loss": 3.7663, "norm_diff": 0.0898, "norm_loss": 0.0, "num_token_doc": 66.633, "num_token_overlap": 11.6621, "num_token_query": 31.3642, "num_token_union": 65.0368, "num_word_context": 202.098, "num_word_doc": 49.6915, "num_word_query": 23.295, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1029.6391, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.322, "query_norm": 1.3377, "queue_k_norm": 1.4326, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3642, "sent_len_1": 66.633, "sent_len_max_0": 127.43, "sent_len_max_1": 191.7413, "stdk": 0.0475, "stdq": 0.0426, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 66500 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.7819, "doc_norm": 1.4335, "encoder_q-embeddings": 577.9241, "encoder_q-layer.0": 375.8549, "encoder_q-layer.1": 397.9862, "encoder_q-layer.10": 678.1542, "encoder_q-layer.11": 1582.3827, "encoder_q-layer.2": 463.5479, "encoder_q-layer.3": 466.5142, "encoder_q-layer.4": 467.4808, "encoder_q-layer.5": 477.6886, "encoder_q-layer.6": 518.0582, "encoder_q-layer.7": 560.0804, "encoder_q-layer.8": 652.1218, "encoder_q-layer.9": 596.5922, "epoch": 0.43, "inbatch_neg_score": 0.3222, "inbatch_pos_score": 0.8843, "learning_rate": 1.8555555555555557e-05, "loss": 3.7819, "norm_diff": 0.0809, "norm_loss": 0.0, "num_token_doc": 66.9206, "num_token_overlap": 11.7006, "num_token_query": 31.4635, "num_token_union": 65.2371, "num_word_context": 202.5658, "num_word_doc": 49.9175, "num_word_query": 23.3877, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1012.8801, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.322, "query_norm": 1.3526, "queue_k_norm": 1.4346, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4635, "sent_len_1": 66.9206, "sent_len_max_0": 127.5212, "sent_len_max_1": 190.0213, "stdk": 0.0477, "stdq": 0.0431, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 66600 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.7892, "doc_norm": 1.4424, "encoder_q-embeddings": 622.8158, "encoder_q-layer.0": 414.0498, "encoder_q-layer.1": 436.3832, "encoder_q-layer.10": 624.4155, "encoder_q-layer.11": 1546.2672, "encoder_q-layer.2": 486.4743, "encoder_q-layer.3": 514.4981, "encoder_q-layer.4": 539.165, "encoder_q-layer.5": 533.54, "encoder_q-layer.6": 567.9048, "encoder_q-layer.7": 633.9778, "encoder_q-layer.8": 679.4537, "encoder_q-layer.9": 605.1803, "epoch": 0.43, "inbatch_neg_score": 0.3177, "inbatch_pos_score": 0.8862, "learning_rate": 1.85e-05, "loss": 3.7892, "norm_diff": 0.1051, "norm_loss": 0.0, "num_token_doc": 66.9108, "num_token_overlap": 11.6768, "num_token_query": 31.3554, "num_token_union": 65.1879, "num_word_context": 202.4141, "num_word_doc": 49.9075, "num_word_query": 23.2927, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1052.3671, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3184, "query_norm": 1.3372, "queue_k_norm": 1.4364, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3554, "sent_len_1": 66.9108, "sent_len_max_0": 127.4163, "sent_len_max_1": 191.5488, "stdk": 0.0481, "stdq": 0.0426, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 66700 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.7788, "doc_norm": 1.4375, "encoder_q-embeddings": 693.942, "encoder_q-layer.0": 472.5027, "encoder_q-layer.1": 513.7672, "encoder_q-layer.10": 608.2808, "encoder_q-layer.11": 1497.3109, "encoder_q-layer.2": 582.6061, "encoder_q-layer.3": 605.8447, "encoder_q-layer.4": 685.4717, "encoder_q-layer.5": 688.8071, "encoder_q-layer.6": 644.1934, "encoder_q-layer.7": 645.2528, "encoder_q-layer.8": 694.2004, "encoder_q-layer.9": 601.7849, "epoch": 0.43, "inbatch_neg_score": 0.324, "inbatch_pos_score": 0.8955, "learning_rate": 1.8444444444444445e-05, "loss": 3.7788, "norm_diff": 0.0895, "norm_loss": 0.0, "num_token_doc": 66.7395, "num_token_overlap": 11.6718, "num_token_query": 31.3087, "num_token_union": 65.0953, "num_word_context": 201.9175, "num_word_doc": 49.8059, "num_word_query": 23.249, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1111.7366, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.324, "query_norm": 1.348, "queue_k_norm": 1.436, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3087, "sent_len_1": 66.7395, "sent_len_max_0": 127.2188, "sent_len_max_1": 187.3487, "stdk": 0.0478, "stdq": 0.0429, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 66800 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 3.7567, "doc_norm": 1.4327, "encoder_q-embeddings": 1313.8733, "encoder_q-layer.0": 919.6564, "encoder_q-layer.1": 1013.6407, "encoder_q-layer.10": 730.6529, "encoder_q-layer.11": 1660.056, "encoder_q-layer.2": 1173.5791, "encoder_q-layer.3": 1131.5791, "encoder_q-layer.4": 1114.8849, "encoder_q-layer.5": 948.9991, "encoder_q-layer.6": 864.3005, "encoder_q-layer.7": 735.4213, "encoder_q-layer.8": 784.3541, "encoder_q-layer.9": 641.267, "epoch": 0.44, "inbatch_neg_score": 0.3318, "inbatch_pos_score": 0.8711, "learning_rate": 1.838888888888889e-05, "loss": 3.7567, "norm_diff": 0.0968, "norm_loss": 0.0, "num_token_doc": 66.9178, "num_token_overlap": 11.7266, "num_token_query": 31.5227, "num_token_union": 65.2631, "num_word_context": 202.3397, "num_word_doc": 49.9297, "num_word_query": 23.4062, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1582.6017, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3298, "query_norm": 1.336, "queue_k_norm": 1.4375, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5227, "sent_len_1": 66.9178, "sent_len_max_0": 127.2613, "sent_len_max_1": 188.605, "stdk": 0.0476, "stdq": 0.0422, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 66900 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.788, "doc_norm": 1.439, "encoder_q-embeddings": 653.7365, "encoder_q-layer.0": 425.4805, "encoder_q-layer.1": 480.2541, "encoder_q-layer.10": 593.3752, "encoder_q-layer.11": 1596.1432, "encoder_q-layer.2": 509.0391, "encoder_q-layer.3": 520.7272, "encoder_q-layer.4": 544.1656, "encoder_q-layer.5": 530.1971, "encoder_q-layer.6": 551.8284, "encoder_q-layer.7": 561.0588, "encoder_q-layer.8": 633.3314, "encoder_q-layer.9": 577.1318, "epoch": 0.44, "inbatch_neg_score": 0.3278, "inbatch_pos_score": 0.8989, "learning_rate": 1.8333333333333333e-05, "loss": 3.788, "norm_diff": 0.0992, "norm_loss": 0.0, "num_token_doc": 66.7445, "num_token_overlap": 11.6967, "num_token_query": 31.4414, "num_token_union": 65.08, "num_word_context": 202.2269, "num_word_doc": 49.811, "num_word_query": 23.3682, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1072.0942, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3281, "query_norm": 1.3399, "queue_k_norm": 1.4358, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4414, "sent_len_1": 66.7445, "sent_len_max_0": 127.4213, "sent_len_max_1": 189.6825, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 67000 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.7926, "doc_norm": 1.439, "encoder_q-embeddings": 759.6042, "encoder_q-layer.0": 541.9131, "encoder_q-layer.1": 575.1046, "encoder_q-layer.10": 618.0851, "encoder_q-layer.11": 1443.0414, "encoder_q-layer.2": 658.6043, "encoder_q-layer.3": 639.0793, "encoder_q-layer.4": 680.0249, "encoder_q-layer.5": 699.8286, "encoder_q-layer.6": 717.5377, "encoder_q-layer.7": 714.2068, "encoder_q-layer.8": 721.2942, "encoder_q-layer.9": 590.6748, "epoch": 0.44, "inbatch_neg_score": 0.3304, "inbatch_pos_score": 0.916, "learning_rate": 1.827777777777778e-05, "loss": 3.7926, "norm_diff": 0.0743, "norm_loss": 0.0, "num_token_doc": 66.6447, "num_token_overlap": 11.6328, "num_token_query": 31.2775, "num_token_union": 64.9784, "num_word_context": 201.831, "num_word_doc": 49.728, "num_word_query": 23.1972, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1133.5646, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3296, "query_norm": 1.3647, "queue_k_norm": 1.4362, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2775, "sent_len_1": 66.6447, "sent_len_max_0": 127.3312, "sent_len_max_1": 190.0588, "stdk": 0.0479, "stdq": 0.0434, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 67100 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.7681, "doc_norm": 1.4355, "encoder_q-embeddings": 910.1569, "encoder_q-layer.0": 619.5993, "encoder_q-layer.1": 679.8939, "encoder_q-layer.10": 597.9792, "encoder_q-layer.11": 1442.5109, "encoder_q-layer.2": 785.2292, "encoder_q-layer.3": 804.936, "encoder_q-layer.4": 907.8552, "encoder_q-layer.5": 932.2751, "encoder_q-layer.6": 925.3325, "encoder_q-layer.7": 800.8258, "encoder_q-layer.8": 737.916, "encoder_q-layer.9": 586.5601, "epoch": 0.44, "inbatch_neg_score": 0.3318, "inbatch_pos_score": 0.8979, "learning_rate": 1.8222222222222224e-05, "loss": 3.7681, "norm_diff": 0.0913, "norm_loss": 0.0, "num_token_doc": 66.452, "num_token_overlap": 11.6639, "num_token_query": 31.4029, "num_token_union": 64.951, "num_word_context": 202.0605, "num_word_doc": 49.5924, "num_word_query": 23.3104, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1288.7745, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3323, "query_norm": 1.3442, "queue_k_norm": 1.4359, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4029, "sent_len_1": 66.452, "sent_len_max_0": 127.6075, "sent_len_max_1": 188.4125, "stdk": 0.0477, "stdq": 0.0424, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 67200 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.7747, "doc_norm": 1.4376, "encoder_q-embeddings": 888.3074, "encoder_q-layer.0": 701.2869, "encoder_q-layer.1": 753.0715, "encoder_q-layer.10": 629.5804, "encoder_q-layer.11": 1486.8811, "encoder_q-layer.2": 914.3802, "encoder_q-layer.3": 967.6661, "encoder_q-layer.4": 966.8978, "encoder_q-layer.5": 1015.4671, "encoder_q-layer.6": 1052.1096, "encoder_q-layer.7": 1015.5161, "encoder_q-layer.8": 816.4224, "encoder_q-layer.9": 585.3972, "epoch": 0.44, "inbatch_neg_score": 0.3353, "inbatch_pos_score": 0.9004, "learning_rate": 1.8166666666666667e-05, "loss": 3.7747, "norm_diff": 0.0882, "norm_loss": 0.0, "num_token_doc": 66.6717, "num_token_overlap": 11.7123, "num_token_query": 31.4865, "num_token_union": 65.1409, "num_word_context": 202.2978, "num_word_doc": 49.7451, "num_word_query": 23.4041, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1396.8371, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3357, "query_norm": 1.3494, "queue_k_norm": 1.4372, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4865, "sent_len_1": 66.6717, "sent_len_max_0": 127.6688, "sent_len_max_1": 191.37, "stdk": 0.0478, "stdq": 0.0427, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 67300 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.808, "doc_norm": 1.443, "encoder_q-embeddings": 769.9751, "encoder_q-layer.0": 581.8358, "encoder_q-layer.1": 592.8065, "encoder_q-layer.10": 675.5594, "encoder_q-layer.11": 1681.6378, "encoder_q-layer.2": 676.3718, "encoder_q-layer.3": 693.0342, "encoder_q-layer.4": 749.4255, "encoder_q-layer.5": 678.8643, "encoder_q-layer.6": 694.1469, "encoder_q-layer.7": 709.5322, "encoder_q-layer.8": 770.4908, "encoder_q-layer.9": 645.1747, "epoch": 0.44, "inbatch_neg_score": 0.3385, "inbatch_pos_score": 0.8965, "learning_rate": 1.8111111111111112e-05, "loss": 3.808, "norm_diff": 0.1076, "norm_loss": 0.0, "num_token_doc": 66.574, "num_token_overlap": 11.6755, "num_token_query": 31.4067, "num_token_union": 64.9574, "num_word_context": 202.1641, "num_word_doc": 49.6678, "num_word_query": 23.3216, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1215.2405, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3394, "query_norm": 1.3353, "queue_k_norm": 1.4394, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4067, "sent_len_1": 66.574, "sent_len_max_0": 127.55, "sent_len_max_1": 190.1987, "stdk": 0.048, "stdq": 0.042, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 67400 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.786, "doc_norm": 1.4389, "encoder_q-embeddings": 514.6365, "encoder_q-layer.0": 332.8228, "encoder_q-layer.1": 342.177, "encoder_q-layer.10": 753.8237, "encoder_q-layer.11": 1622.0984, "encoder_q-layer.2": 390.093, "encoder_q-layer.3": 399.5429, "encoder_q-layer.4": 424.8288, "encoder_q-layer.5": 423.0734, "encoder_q-layer.6": 470.1974, "encoder_q-layer.7": 534.3787, "encoder_q-layer.8": 659.6602, "encoder_q-layer.9": 611.6125, "epoch": 0.44, "inbatch_neg_score": 0.342, "inbatch_pos_score": 0.9062, "learning_rate": 1.8055555555555555e-05, "loss": 3.786, "norm_diff": 0.1103, "norm_loss": 0.0, "num_token_doc": 66.8128, "num_token_overlap": 11.6674, "num_token_query": 31.3547, "num_token_union": 65.0633, "num_word_context": 202.4011, "num_word_doc": 49.8453, "num_word_query": 23.2696, "postclip_grad_norm": 1.0, "preclip_grad_norm": 996.3792, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.342, "query_norm": 1.3286, "queue_k_norm": 1.4395, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3547, "sent_len_1": 66.8128, "sent_len_max_0": 127.5162, "sent_len_max_1": 193.115, "stdk": 0.0478, "stdq": 0.0418, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 67500 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.7734, "doc_norm": 1.4392, "encoder_q-embeddings": 606.1328, "encoder_q-layer.0": 394.9229, "encoder_q-layer.1": 419.5011, "encoder_q-layer.10": 656.9011, "encoder_q-layer.11": 1559.6052, "encoder_q-layer.2": 498.0621, "encoder_q-layer.3": 506.853, "encoder_q-layer.4": 497.2727, "encoder_q-layer.5": 496.6962, "encoder_q-layer.6": 500.5232, "encoder_q-layer.7": 528.551, "encoder_q-layer.8": 596.2918, "encoder_q-layer.9": 600.5563, "epoch": 0.44, "inbatch_neg_score": 0.3437, "inbatch_pos_score": 0.9058, "learning_rate": 1.8e-05, "loss": 3.7734, "norm_diff": 0.1015, "norm_loss": 0.0, "num_token_doc": 66.814, "num_token_overlap": 11.673, "num_token_query": 31.3564, "num_token_union": 65.1387, "num_word_context": 202.3414, "num_word_doc": 49.8506, "num_word_query": 23.276, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1001.3578, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3445, "query_norm": 1.3377, "queue_k_norm": 1.4426, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3564, "sent_len_1": 66.814, "sent_len_max_0": 127.4237, "sent_len_max_1": 188.7388, "stdk": 0.0478, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 67600 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.7901, "doc_norm": 1.4368, "encoder_q-embeddings": 657.4673, "encoder_q-layer.0": 432.3316, "encoder_q-layer.1": 463.162, "encoder_q-layer.10": 613.4667, "encoder_q-layer.11": 1523.7877, "encoder_q-layer.2": 527.4144, "encoder_q-layer.3": 554.6826, "encoder_q-layer.4": 605.6689, "encoder_q-layer.5": 609.6795, "encoder_q-layer.6": 658.8314, "encoder_q-layer.7": 675.8765, "encoder_q-layer.8": 740.5532, "encoder_q-layer.9": 590.4832, "epoch": 0.44, "inbatch_neg_score": 0.3444, "inbatch_pos_score": 0.8979, "learning_rate": 1.7944444444444443e-05, "loss": 3.7901, "norm_diff": 0.1104, "norm_loss": 0.0, "num_token_doc": 66.8015, "num_token_overlap": 11.6451, "num_token_query": 31.3493, "num_token_union": 65.1415, "num_word_context": 202.5779, "num_word_doc": 49.8332, "num_word_query": 23.3007, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1102.2368, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3445, "query_norm": 1.3264, "queue_k_norm": 1.4416, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3493, "sent_len_1": 66.8015, "sent_len_max_0": 127.455, "sent_len_max_1": 190.1838, "stdk": 0.0476, "stdq": 0.0419, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 67700 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.7466, "doc_norm": 1.4479, "encoder_q-embeddings": 1981.9447, "encoder_q-layer.0": 1337.7875, "encoder_q-layer.1": 1567.0963, "encoder_q-layer.10": 1365.1614, "encoder_q-layer.11": 3141.9399, "encoder_q-layer.2": 1932.1746, "encoder_q-layer.3": 2051.0122, "encoder_q-layer.4": 2178.9404, "encoder_q-layer.5": 2029.5974, "encoder_q-layer.6": 1845.7605, "encoder_q-layer.7": 1677.8593, "encoder_q-layer.8": 1545.804, "encoder_q-layer.9": 1290.1536, "epoch": 0.44, "inbatch_neg_score": 0.3429, "inbatch_pos_score": 0.9209, "learning_rate": 1.788888888888889e-05, "loss": 3.7466, "norm_diff": 0.1113, "norm_loss": 0.0, "num_token_doc": 66.8496, "num_token_overlap": 11.6867, "num_token_query": 31.3763, "num_token_union": 65.1525, "num_word_context": 201.944, "num_word_doc": 49.8917, "num_word_query": 23.3058, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2866.1031, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3433, "query_norm": 1.3366, "queue_k_norm": 1.4399, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3763, "sent_len_1": 66.8496, "sent_len_max_0": 127.4775, "sent_len_max_1": 189.6813, "stdk": 0.048, "stdq": 0.0425, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 67800 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.7816, "doc_norm": 1.4355, "encoder_q-embeddings": 1231.8911, "encoder_q-layer.0": 843.7772, "encoder_q-layer.1": 950.338, "encoder_q-layer.10": 1218.7148, "encoder_q-layer.11": 3155.4792, "encoder_q-layer.2": 1046.4724, "encoder_q-layer.3": 1074.2802, "encoder_q-layer.4": 1061.267, "encoder_q-layer.5": 1074.0137, "encoder_q-layer.6": 1111.4535, "encoder_q-layer.7": 1104.8665, "encoder_q-layer.8": 1223.0546, "encoder_q-layer.9": 1156.265, "epoch": 0.44, "inbatch_neg_score": 0.3466, "inbatch_pos_score": 0.9014, "learning_rate": 1.7833333333333334e-05, "loss": 3.7816, "norm_diff": 0.098, "norm_loss": 0.0, "num_token_doc": 66.9458, "num_token_overlap": 11.7123, "num_token_query": 31.4596, "num_token_union": 65.2643, "num_word_context": 202.3296, "num_word_doc": 49.9368, "num_word_query": 23.3806, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2100.2943, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3459, "query_norm": 1.3375, "queue_k_norm": 1.443, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4596, "sent_len_1": 66.9458, "sent_len_max_0": 127.4013, "sent_len_max_1": 189.43, "stdk": 0.0475, "stdq": 0.0425, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 67900 }, { "accuracy": 44.3359, "active_queue_size": 16384.0, "cl_loss": 3.7755, "doc_norm": 1.4383, "encoder_q-embeddings": 1210.5431, "encoder_q-layer.0": 830.4134, "encoder_q-layer.1": 896.9332, "encoder_q-layer.10": 1297.8663, "encoder_q-layer.11": 3306.0676, "encoder_q-layer.2": 988.8925, "encoder_q-layer.3": 1048.3472, "encoder_q-layer.4": 1055.6887, "encoder_q-layer.5": 1111.3236, "encoder_q-layer.6": 1194.1338, "encoder_q-layer.7": 1234.1559, "encoder_q-layer.8": 1323.3751, "encoder_q-layer.9": 1184.2004, "epoch": 0.44, "inbatch_neg_score": 0.3461, "inbatch_pos_score": 0.8887, "learning_rate": 1.777777777777778e-05, "loss": 3.7755, "norm_diff": 0.114, "norm_loss": 0.0, "num_token_doc": 66.713, "num_token_overlap": 11.6548, "num_token_query": 31.2813, "num_token_union": 65.0104, "num_word_context": 202.2757, "num_word_doc": 49.7844, "num_word_query": 23.2315, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2181.1559, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3457, "query_norm": 1.3243, "queue_k_norm": 1.4442, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2813, "sent_len_1": 66.713, "sent_len_max_0": 127.4188, "sent_len_max_1": 189.855, "stdk": 0.0476, "stdq": 0.0421, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 68000 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.7712, "doc_norm": 1.4434, "encoder_q-embeddings": 2072.6138, "encoder_q-layer.0": 1498.3259, "encoder_q-layer.1": 1738.9115, "encoder_q-layer.10": 1237.5371, "encoder_q-layer.11": 3231.7756, "encoder_q-layer.2": 2066.6282, "encoder_q-layer.3": 2024.7094, "encoder_q-layer.4": 2098.071, "encoder_q-layer.5": 1763.5504, "encoder_q-layer.6": 1647.6838, "encoder_q-layer.7": 1480.5533, "encoder_q-layer.8": 1504.9005, "encoder_q-layer.9": 1213.304, "epoch": 0.44, "inbatch_neg_score": 0.3446, "inbatch_pos_score": 0.9014, "learning_rate": 1.7722222222222222e-05, "loss": 3.7712, "norm_diff": 0.1125, "norm_loss": 0.0, "num_token_doc": 66.7435, "num_token_overlap": 11.6513, "num_token_query": 31.4143, "num_token_union": 65.151, "num_word_context": 202.5604, "num_word_doc": 49.8286, "num_word_query": 23.3372, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2827.9378, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.345, "query_norm": 1.331, "queue_k_norm": 1.4443, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4143, "sent_len_1": 66.7435, "sent_len_max_0": 127.4788, "sent_len_max_1": 188.7738, "stdk": 0.0478, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 68100 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 3.774, "doc_norm": 1.4476, "encoder_q-embeddings": 8033.3633, "encoder_q-layer.0": 5811.5234, "encoder_q-layer.1": 5397.8877, "encoder_q-layer.10": 1214.9209, "encoder_q-layer.11": 3226.5833, "encoder_q-layer.2": 6133.7046, "encoder_q-layer.3": 6869.6665, "encoder_q-layer.4": 6535.3882, "encoder_q-layer.5": 4899.3066, "encoder_q-layer.6": 3779.0342, "encoder_q-layer.7": 2284.8862, "encoder_q-layer.8": 1783.7295, "encoder_q-layer.9": 1213.0076, "epoch": 0.44, "inbatch_neg_score": 0.3449, "inbatch_pos_score": 0.896, "learning_rate": 1.7666666666666668e-05, "loss": 3.774, "norm_diff": 0.1204, "norm_loss": 0.0, "num_token_doc": 66.8813, "num_token_overlap": 11.685, "num_token_query": 31.4735, "num_token_union": 65.2319, "num_word_context": 202.8454, "num_word_doc": 49.9107, "num_word_query": 23.3846, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7993.4985, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3452, "query_norm": 1.3272, "queue_k_norm": 1.4456, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4735, "sent_len_1": 66.8813, "sent_len_max_0": 127.4163, "sent_len_max_1": 189.5225, "stdk": 0.048, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 68200 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7597, "doc_norm": 1.44, "encoder_q-embeddings": 1274.3812, "encoder_q-layer.0": 862.6111, "encoder_q-layer.1": 966.9285, "encoder_q-layer.10": 1313.402, "encoder_q-layer.11": 3296.8804, "encoder_q-layer.2": 1114.7056, "encoder_q-layer.3": 1107.4192, "encoder_q-layer.4": 1241.1284, "encoder_q-layer.5": 1417.2526, "encoder_q-layer.6": 1350.6656, "encoder_q-layer.7": 1330.1063, "encoder_q-layer.8": 1371.0613, "encoder_q-layer.9": 1201.4182, "epoch": 0.44, "inbatch_neg_score": 0.3474, "inbatch_pos_score": 0.9165, "learning_rate": 1.761111111111111e-05, "loss": 3.7597, "norm_diff": 0.097, "norm_loss": 0.0, "num_token_doc": 66.6998, "num_token_overlap": 11.6794, "num_token_query": 31.3176, "num_token_union": 65.051, "num_word_context": 202.3969, "num_word_doc": 49.7644, "num_word_query": 23.2419, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2265.7817, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3477, "query_norm": 1.343, "queue_k_norm": 1.4455, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3176, "sent_len_1": 66.6998, "sent_len_max_0": 127.5088, "sent_len_max_1": 190.1513, "stdk": 0.0477, "stdq": 0.0428, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 68300 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.779, "doc_norm": 1.4459, "encoder_q-embeddings": 1280.8507, "encoder_q-layer.0": 869.4965, "encoder_q-layer.1": 947.142, "encoder_q-layer.10": 1287.3378, "encoder_q-layer.11": 3247.6711, "encoder_q-layer.2": 1171.0565, "encoder_q-layer.3": 1229.8845, "encoder_q-layer.4": 1316.5684, "encoder_q-layer.5": 1178.4746, "encoder_q-layer.6": 1274.741, "encoder_q-layer.7": 1439.2367, "encoder_q-layer.8": 1543.0085, "encoder_q-layer.9": 1341.8793, "epoch": 0.45, "inbatch_neg_score": 0.3445, "inbatch_pos_score": 0.8975, "learning_rate": 1.7555555555555556e-05, "loss": 3.779, "norm_diff": 0.1075, "norm_loss": 0.0, "num_token_doc": 66.8495, "num_token_overlap": 11.6318, "num_token_query": 31.2276, "num_token_union": 65.0747, "num_word_context": 202.6191, "num_word_doc": 49.8597, "num_word_query": 23.181, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2288.449, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3428, "query_norm": 1.3384, "queue_k_norm": 1.4464, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2276, "sent_len_1": 66.8495, "sent_len_max_0": 127.24, "sent_len_max_1": 190.93, "stdk": 0.0478, "stdq": 0.0427, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 68400 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.7778, "doc_norm": 1.4394, "encoder_q-embeddings": 1358.2633, "encoder_q-layer.0": 980.8969, "encoder_q-layer.1": 1084.0881, "encoder_q-layer.10": 1196.1154, "encoder_q-layer.11": 2928.0967, "encoder_q-layer.2": 1273.7313, "encoder_q-layer.3": 1266.8926, "encoder_q-layer.4": 1275.874, "encoder_q-layer.5": 1219.7085, "encoder_q-layer.6": 1125.5701, "encoder_q-layer.7": 1137.114, "encoder_q-layer.8": 1227.679, "encoder_q-layer.9": 1132.5516, "epoch": 0.45, "inbatch_neg_score": 0.3469, "inbatch_pos_score": 0.915, "learning_rate": 1.75e-05, "loss": 3.7778, "norm_diff": 0.1201, "norm_loss": 0.0, "num_token_doc": 66.8789, "num_token_overlap": 11.7468, "num_token_query": 31.4645, "num_token_union": 65.1941, "num_word_context": 202.4668, "num_word_doc": 49.9145, "num_word_query": 23.3826, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2149.2911, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3472, "query_norm": 1.3194, "queue_k_norm": 1.4464, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4645, "sent_len_1": 66.8789, "sent_len_max_0": 127.5413, "sent_len_max_1": 189.1287, "stdk": 0.0476, "stdq": 0.0419, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 68500 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.7754, "doc_norm": 1.45, "encoder_q-embeddings": 2009.4727, "encoder_q-layer.0": 1380.7654, "encoder_q-layer.1": 1539.8654, "encoder_q-layer.10": 1157.6013, "encoder_q-layer.11": 2925.9631, "encoder_q-layer.2": 1862.8198, "encoder_q-layer.3": 1872.525, "encoder_q-layer.4": 1952.3743, "encoder_q-layer.5": 1791.6399, "encoder_q-layer.6": 1652.5558, "encoder_q-layer.7": 1483.3221, "encoder_q-layer.8": 1430.6617, "encoder_q-layer.9": 1180.5187, "epoch": 0.45, "inbatch_neg_score": 0.3456, "inbatch_pos_score": 0.9004, "learning_rate": 1.7444444444444448e-05, "loss": 3.7754, "norm_diff": 0.1333, "norm_loss": 0.0, "num_token_doc": 66.6034, "num_token_overlap": 11.6261, "num_token_query": 31.292, "num_token_union": 65.0394, "num_word_context": 202.1736, "num_word_doc": 49.7293, "num_word_query": 23.2515, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2679.8461, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3442, "query_norm": 1.3167, "queue_k_norm": 1.4453, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.292, "sent_len_1": 66.6034, "sent_len_max_0": 127.325, "sent_len_max_1": 189.4112, "stdk": 0.048, "stdq": 0.0418, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 68600 }, { "accuracy": 42.8711, "active_queue_size": 16384.0, "cl_loss": 3.7534, "doc_norm": 1.4461, "encoder_q-embeddings": 1373.0516, "encoder_q-layer.0": 935.6696, "encoder_q-layer.1": 1035.9485, "encoder_q-layer.10": 1375.5831, "encoder_q-layer.11": 3218.2136, "encoder_q-layer.2": 1185.6174, "encoder_q-layer.3": 1273.0486, "encoder_q-layer.4": 1287.4475, "encoder_q-layer.5": 1349.7957, "encoder_q-layer.6": 1427.3373, "encoder_q-layer.7": 1386.5415, "encoder_q-layer.8": 1449.2173, "encoder_q-layer.9": 1242.8566, "epoch": 0.45, "inbatch_neg_score": 0.3479, "inbatch_pos_score": 0.8818, "learning_rate": 1.738888888888889e-05, "loss": 3.7534, "norm_diff": 0.1122, "norm_loss": 0.0, "num_token_doc": 66.8899, "num_token_overlap": 11.734, "num_token_query": 31.5407, "num_token_union": 65.2554, "num_word_context": 202.5037, "num_word_doc": 49.9188, "num_word_query": 23.4399, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2294.0716, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3467, "query_norm": 1.3339, "queue_k_norm": 1.4463, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.5407, "sent_len_1": 66.8899, "sent_len_max_0": 127.4838, "sent_len_max_1": 188.7512, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 68700 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 3.7618, "doc_norm": 1.4459, "encoder_q-embeddings": 1706.2296, "encoder_q-layer.0": 1218.9666, "encoder_q-layer.1": 1319.4225, "encoder_q-layer.10": 1228.0592, "encoder_q-layer.11": 3116.3113, "encoder_q-layer.2": 1505.7488, "encoder_q-layer.3": 1588.2876, "encoder_q-layer.4": 1701.8286, "encoder_q-layer.5": 1760.5128, "encoder_q-layer.6": 1735.0094, "encoder_q-layer.7": 1618.1609, "encoder_q-layer.8": 1568.5355, "encoder_q-layer.9": 1210.8035, "epoch": 0.45, "inbatch_neg_score": 0.3496, "inbatch_pos_score": 0.894, "learning_rate": 1.7333333333333336e-05, "loss": 3.7618, "norm_diff": 0.1112, "norm_loss": 0.0, "num_token_doc": 66.7472, "num_token_overlap": 11.6409, "num_token_query": 31.3468, "num_token_union": 65.123, "num_word_context": 202.4377, "num_word_doc": 49.8043, "num_word_query": 23.2865, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2604.2543, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3496, "query_norm": 1.3347, "queue_k_norm": 1.4469, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3468, "sent_len_1": 66.7472, "sent_len_max_0": 127.4887, "sent_len_max_1": 188.38, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 68800 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.7673, "doc_norm": 1.4453, "encoder_q-embeddings": 1347.14, "encoder_q-layer.0": 903.6573, "encoder_q-layer.1": 1016.4886, "encoder_q-layer.10": 1281.8191, "encoder_q-layer.11": 3103.2693, "encoder_q-layer.2": 1160.7191, "encoder_q-layer.3": 1207.9646, "encoder_q-layer.4": 1231.2854, "encoder_q-layer.5": 1301.1097, "encoder_q-layer.6": 1295.5896, "encoder_q-layer.7": 1375.913, "encoder_q-layer.8": 1422.2751, "encoder_q-layer.9": 1216.2148, "epoch": 0.45, "inbatch_neg_score": 0.3483, "inbatch_pos_score": 0.9121, "learning_rate": 1.7277777777777778e-05, "loss": 3.7673, "norm_diff": 0.1053, "norm_loss": 0.0, "num_token_doc": 66.9156, "num_token_overlap": 11.6646, "num_token_query": 31.3469, "num_token_union": 65.1894, "num_word_context": 202.4511, "num_word_doc": 49.9003, "num_word_query": 23.2662, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2233.5229, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3477, "query_norm": 1.34, "queue_k_norm": 1.4468, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3469, "sent_len_1": 66.9156, "sent_len_max_0": 127.4112, "sent_len_max_1": 191.465, "stdk": 0.0478, "stdq": 0.0426, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 68900 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 3.7603, "doc_norm": 1.4496, "encoder_q-embeddings": 1345.4286, "encoder_q-layer.0": 875.1658, "encoder_q-layer.1": 973.7068, "encoder_q-layer.10": 1341.3694, "encoder_q-layer.11": 3170.1265, "encoder_q-layer.2": 1032.3301, "encoder_q-layer.3": 1057.167, "encoder_q-layer.4": 1088.036, "encoder_q-layer.5": 1060.509, "encoder_q-layer.6": 1080.5143, "encoder_q-layer.7": 1152.261, "encoder_q-layer.8": 1305.0211, "encoder_q-layer.9": 1212.4159, "epoch": 0.45, "inbatch_neg_score": 0.3474, "inbatch_pos_score": 0.9111, "learning_rate": 1.7222222222222224e-05, "loss": 3.7603, "norm_diff": 0.1199, "norm_loss": 0.0, "num_token_doc": 66.8231, "num_token_overlap": 11.688, "num_token_query": 31.3709, "num_token_union": 65.1306, "num_word_context": 202.4665, "num_word_doc": 49.9003, "num_word_query": 23.3028, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2170.4177, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3467, "query_norm": 1.3297, "queue_k_norm": 1.4469, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3709, "sent_len_1": 66.8231, "sent_len_max_0": 127.5475, "sent_len_max_1": 188.9137, "stdk": 0.0479, "stdq": 0.0421, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 69000 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.7671, "doc_norm": 1.4521, "encoder_q-embeddings": 1135.7324, "encoder_q-layer.0": 754.9999, "encoder_q-layer.1": 776.7671, "encoder_q-layer.10": 1251.8636, "encoder_q-layer.11": 3264.4082, "encoder_q-layer.2": 921.0267, "encoder_q-layer.3": 927.4062, "encoder_q-layer.4": 951.6664, "encoder_q-layer.5": 935.8871, "encoder_q-layer.6": 1002.6299, "encoder_q-layer.7": 1112.4789, "encoder_q-layer.8": 1272.5726, "encoder_q-layer.9": 1138.5898, "epoch": 0.45, "inbatch_neg_score": 0.3524, "inbatch_pos_score": 0.9277, "learning_rate": 1.7166666666666666e-05, "loss": 3.7671, "norm_diff": 0.0966, "norm_loss": 0.0, "num_token_doc": 66.7665, "num_token_overlap": 11.6442, "num_token_query": 31.3329, "num_token_union": 65.0859, "num_word_context": 202.1448, "num_word_doc": 49.8142, "num_word_query": 23.2636, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2072.3368, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3521, "query_norm": 1.3556, "queue_k_norm": 1.4479, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3329, "sent_len_1": 66.7665, "sent_len_max_0": 127.2325, "sent_len_max_1": 188.9712, "stdk": 0.048, "stdq": 0.0431, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 69100 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.7783, "doc_norm": 1.4452, "encoder_q-embeddings": 1071.2162, "encoder_q-layer.0": 726.4675, "encoder_q-layer.1": 777.2614, "encoder_q-layer.10": 1195.9143, "encoder_q-layer.11": 2907.022, "encoder_q-layer.2": 887.4712, "encoder_q-layer.3": 909.4057, "encoder_q-layer.4": 936.2946, "encoder_q-layer.5": 973.6796, "encoder_q-layer.6": 1019.065, "encoder_q-layer.7": 1107.4229, "encoder_q-layer.8": 1245.6395, "encoder_q-layer.9": 1097.3755, "epoch": 0.45, "inbatch_neg_score": 0.3492, "inbatch_pos_score": 0.9199, "learning_rate": 1.7111111111111112e-05, "loss": 3.7783, "norm_diff": 0.1045, "norm_loss": 0.0, "num_token_doc": 66.631, "num_token_overlap": 11.6714, "num_token_query": 31.4592, "num_token_union": 65.1042, "num_word_context": 202.4524, "num_word_doc": 49.7548, "num_word_query": 23.3635, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1932.2034, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3496, "query_norm": 1.3407, "queue_k_norm": 1.4461, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4592, "sent_len_1": 66.631, "sent_len_max_0": 127.5275, "sent_len_max_1": 190.5375, "stdk": 0.0477, "stdq": 0.0425, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 69200 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.7547, "doc_norm": 1.4461, "encoder_q-embeddings": 2572.0383, "encoder_q-layer.0": 1830.059, "encoder_q-layer.1": 1982.8185, "encoder_q-layer.10": 1294.2119, "encoder_q-layer.11": 2913.7019, "encoder_q-layer.2": 2135.9211, "encoder_q-layer.3": 1975.532, "encoder_q-layer.4": 1863.6799, "encoder_q-layer.5": 1736.2395, "encoder_q-layer.6": 1607.9531, "encoder_q-layer.7": 1469.5042, "encoder_q-layer.8": 1424.9604, "encoder_q-layer.9": 1136.9062, "epoch": 0.45, "inbatch_neg_score": 0.3515, "inbatch_pos_score": 0.9277, "learning_rate": 1.7055555555555554e-05, "loss": 3.7547, "norm_diff": 0.1029, "norm_loss": 0.0, "num_token_doc": 67.0675, "num_token_overlap": 11.7281, "num_token_query": 31.5431, "num_token_union": 65.3575, "num_word_context": 202.4694, "num_word_doc": 50.0399, "num_word_query": 23.4321, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2914.0281, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.354, "query_norm": 1.3432, "queue_k_norm": 1.448, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5431, "sent_len_1": 67.0675, "sent_len_max_0": 127.54, "sent_len_max_1": 188.7637, "stdk": 0.0478, "stdq": 0.0425, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 69300 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.7638, "doc_norm": 1.4501, "encoder_q-embeddings": 1708.4346, "encoder_q-layer.0": 1156.1359, "encoder_q-layer.1": 1267.9718, "encoder_q-layer.10": 1254.5771, "encoder_q-layer.11": 3084.5625, "encoder_q-layer.2": 1587.5273, "encoder_q-layer.3": 1538.9528, "encoder_q-layer.4": 1586.2026, "encoder_q-layer.5": 1570.1233, "encoder_q-layer.6": 1419.2797, "encoder_q-layer.7": 1416.7223, "encoder_q-layer.8": 1317.8054, "encoder_q-layer.9": 1171.3914, "epoch": 0.45, "inbatch_neg_score": 0.3543, "inbatch_pos_score": 0.9165, "learning_rate": 1.7000000000000003e-05, "loss": 3.7638, "norm_diff": 0.1043, "norm_loss": 0.0, "num_token_doc": 66.7445, "num_token_overlap": 11.6797, "num_token_query": 31.4411, "num_token_union": 65.0932, "num_word_context": 202.1551, "num_word_doc": 49.7609, "num_word_query": 23.3219, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2498.2042, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3542, "query_norm": 1.3458, "queue_k_norm": 1.4495, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4411, "sent_len_1": 66.7445, "sent_len_max_0": 127.605, "sent_len_max_1": 190.5525, "stdk": 0.0479, "stdq": 0.0425, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 69400 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.7711, "doc_norm": 1.4478, "encoder_q-embeddings": 1318.6344, "encoder_q-layer.0": 882.6578, "encoder_q-layer.1": 1015.3754, "encoder_q-layer.10": 1223.2401, "encoder_q-layer.11": 2955.895, "encoder_q-layer.2": 955.6144, "encoder_q-layer.3": 945.3085, "encoder_q-layer.4": 954.1054, "encoder_q-layer.5": 1010.0998, "encoder_q-layer.6": 1055.1459, "encoder_q-layer.7": 1124.0448, "encoder_q-layer.8": 1291.3459, "encoder_q-layer.9": 1169.5825, "epoch": 0.45, "inbatch_neg_score": 0.3613, "inbatch_pos_score": 0.9248, "learning_rate": 1.6944444444444446e-05, "loss": 3.7711, "norm_diff": 0.0901, "norm_loss": 0.0, "num_token_doc": 66.7711, "num_token_overlap": 11.6992, "num_token_query": 31.3042, "num_token_union": 64.9872, "num_word_context": 202.2983, "num_word_doc": 49.821, "num_word_query": 23.2371, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2042.2511, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3618, "query_norm": 1.3578, "queue_k_norm": 1.4482, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3042, "sent_len_1": 66.7711, "sent_len_max_0": 127.5125, "sent_len_max_1": 189.4325, "stdk": 0.0478, "stdq": 0.0428, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 69500 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 3.7678, "doc_norm": 1.4479, "encoder_q-embeddings": 2850.625, "encoder_q-layer.0": 1972.3502, "encoder_q-layer.1": 2412.5237, "encoder_q-layer.10": 1206.1583, "encoder_q-layer.11": 3087.1023, "encoder_q-layer.2": 2645.5627, "encoder_q-layer.3": 2580.1545, "encoder_q-layer.4": 2672.0977, "encoder_q-layer.5": 2923.6875, "encoder_q-layer.6": 3272.793, "encoder_q-layer.7": 3085.8247, "encoder_q-layer.8": 2336.2969, "encoder_q-layer.9": 1423.887, "epoch": 0.45, "inbatch_neg_score": 0.3613, "inbatch_pos_score": 0.9219, "learning_rate": 1.688888888888889e-05, "loss": 3.7678, "norm_diff": 0.1082, "norm_loss": 0.0, "num_token_doc": 66.6796, "num_token_overlap": 11.6772, "num_token_query": 31.3768, "num_token_union": 65.0699, "num_word_context": 202.2734, "num_word_doc": 49.7719, "num_word_query": 23.3127, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3863.4613, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3606, "query_norm": 1.3397, "queue_k_norm": 1.4485, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3768, "sent_len_1": 66.6796, "sent_len_max_0": 127.4675, "sent_len_max_1": 189.6025, "stdk": 0.0478, "stdq": 0.0421, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 69600 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.7758, "doc_norm": 1.4534, "encoder_q-embeddings": 1878.3428, "encoder_q-layer.0": 1344.7678, "encoder_q-layer.1": 1513.8293, "encoder_q-layer.10": 1252.6946, "encoder_q-layer.11": 2974.0024, "encoder_q-layer.2": 1913.4462, "encoder_q-layer.3": 1747.3586, "encoder_q-layer.4": 1549.8683, "encoder_q-layer.5": 1377.2897, "encoder_q-layer.6": 1320.0125, "encoder_q-layer.7": 1267.746, "encoder_q-layer.8": 1217.9423, "encoder_q-layer.9": 1125.4071, "epoch": 0.45, "inbatch_neg_score": 0.3658, "inbatch_pos_score": 0.9258, "learning_rate": 1.6833333333333334e-05, "loss": 3.7758, "norm_diff": 0.1089, "norm_loss": 0.0, "num_token_doc": 66.8079, "num_token_overlap": 11.6879, "num_token_query": 31.3848, "num_token_union": 65.1267, "num_word_context": 202.088, "num_word_doc": 49.8392, "num_word_query": 23.2903, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2540.5713, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3657, "query_norm": 1.3445, "queue_k_norm": 1.4483, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3848, "sent_len_1": 66.8079, "sent_len_max_0": 127.4112, "sent_len_max_1": 190.1113, "stdk": 0.048, "stdq": 0.0422, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 69700 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.7696, "doc_norm": 1.4542, "encoder_q-embeddings": 2275.0513, "encoder_q-layer.0": 1509.1366, "encoder_q-layer.1": 1552.0979, "encoder_q-layer.10": 2738.4241, "encoder_q-layer.11": 6055.0288, "encoder_q-layer.2": 1755.7947, "encoder_q-layer.3": 1950.8612, "encoder_q-layer.4": 2035.1801, "encoder_q-layer.5": 1955.4008, "encoder_q-layer.6": 1986.6113, "encoder_q-layer.7": 2146.7405, "encoder_q-layer.8": 2575.0366, "encoder_q-layer.9": 2229.9189, "epoch": 0.45, "inbatch_neg_score": 0.3657, "inbatch_pos_score": 0.9399, "learning_rate": 1.677777777777778e-05, "loss": 3.7696, "norm_diff": 0.0987, "norm_loss": 0.0, "num_token_doc": 66.689, "num_token_overlap": 11.6622, "num_token_query": 31.4146, "num_token_union": 65.0868, "num_word_context": 202.5321, "num_word_doc": 49.7574, "num_word_query": 23.3395, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3998.2828, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3669, "query_norm": 1.3554, "queue_k_norm": 1.4493, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4146, "sent_len_1": 66.689, "sent_len_max_0": 127.4712, "sent_len_max_1": 190.9787, "stdk": 0.048, "stdq": 0.0426, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 69800 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.742, "doc_norm": 1.4515, "encoder_q-embeddings": 3647.9971, "encoder_q-layer.0": 2612.3364, "encoder_q-layer.1": 3211.5171, "encoder_q-layer.10": 2443.1628, "encoder_q-layer.11": 6111.5117, "encoder_q-layer.2": 3927.2263, "encoder_q-layer.3": 4276.7603, "encoder_q-layer.4": 5284.6294, "encoder_q-layer.5": 5550.9082, "encoder_q-layer.6": 3765.9048, "encoder_q-layer.7": 3482.9238, "encoder_q-layer.8": 3055.3579, "encoder_q-layer.9": 2450.1875, "epoch": 0.45, "inbatch_neg_score": 0.3706, "inbatch_pos_score": 0.939, "learning_rate": 1.6722222222222222e-05, "loss": 3.742, "norm_diff": 0.1016, "norm_loss": 0.0, "num_token_doc": 66.689, "num_token_overlap": 11.6603, "num_token_query": 31.3378, "num_token_union": 65.0538, "num_word_context": 202.1362, "num_word_doc": 49.7658, "num_word_query": 23.277, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5939.0176, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3706, "query_norm": 1.3499, "queue_k_norm": 1.4508, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3378, "sent_len_1": 66.689, "sent_len_max_0": 127.5113, "sent_len_max_1": 188.5037, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 69900 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.7625, "doc_norm": 1.4472, "encoder_q-embeddings": 2227.6814, "encoder_q-layer.0": 1433.668, "encoder_q-layer.1": 1463.4216, "encoder_q-layer.10": 2573.627, "encoder_q-layer.11": 6437.2754, "encoder_q-layer.2": 1680.3367, "encoder_q-layer.3": 1700.5499, "encoder_q-layer.4": 1812.4657, "encoder_q-layer.5": 1806.1913, "encoder_q-layer.6": 1971.7053, "encoder_q-layer.7": 2232.5684, "encoder_q-layer.8": 2561.3669, "encoder_q-layer.9": 2417.5471, "epoch": 0.46, "inbatch_neg_score": 0.371, "inbatch_pos_score": 0.9351, "learning_rate": 1.6666666666666667e-05, "loss": 3.7625, "norm_diff": 0.1007, "norm_loss": 0.0, "num_token_doc": 66.9643, "num_token_overlap": 11.7168, "num_token_query": 31.497, "num_token_union": 65.2792, "num_word_context": 202.358, "num_word_doc": 49.9783, "num_word_query": 23.399, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4114.449, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3708, "query_norm": 1.3466, "queue_k_norm": 1.451, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.497, "sent_len_1": 66.9643, "sent_len_max_0": 127.5975, "sent_len_max_1": 191.7625, "stdk": 0.0477, "stdq": 0.0423, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 70000 }, { "dev_runtime": 29.4196, "dev_samples_per_second": 2.175, "dev_steps_per_second": 0.034, "epoch": 0.46, "step": 70000, "test_accuracy": 92.87109375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4094051718711853, "test_doc_norm": 1.4126441478729248, "test_inbatch_neg_score": 0.6929380297660828, "test_inbatch_pos_score": 1.5828536748886108, "test_loss": 0.4094051718711853, "test_loss_align": 0.9880537986755371, "test_loss_unif": 3.7412986755371094, "test_loss_unif_q@queue": 3.7412984371185303, "test_norm_diff": 0.04631619155406952, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.3526493310928345, "test_query_norm": 1.4589602947235107, "test_queue_k_norm": 1.4511405229568481, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04135530814528465, "test_stdq": 0.04158562421798706, "test_stdqueue_k": 0.04793735593557358, "test_stdqueue_q": 0.0 }, { "dev_runtime": 29.4196, "dev_samples_per_second": 2.175, "dev_steps_per_second": 0.034, "epoch": 0.46, "eval_beir-arguana_ndcg@10": 0.36948, "eval_beir-arguana_recall@10": 0.63016, "eval_beir-arguana_recall@100": 0.93314, "eval_beir-arguana_recall@20": 0.76956, "eval_beir-avg_ndcg@10": 0.37403766666666666, "eval_beir-avg_recall@10": 0.4443060833333333, "eval_beir-avg_recall@100": 0.63266775, "eval_beir-avg_recall@20": 0.5089735, "eval_beir-cqadupstack_ndcg@10": 0.25705666666666666, "eval_beir-cqadupstack_recall@10": 0.35303083333333335, "eval_beir-cqadupstack_recall@100": 0.5872375, "eval_beir-cqadupstack_recall@20": 0.42159499999999994, "eval_beir-fiqa_ndcg@10": 0.2396, "eval_beir-fiqa_recall@10": 0.29801, "eval_beir-fiqa_recall@100": 0.57039, "eval_beir-fiqa_recall@20": 0.38044, "eval_beir-nfcorpus_ndcg@10": 0.30221, "eval_beir-nfcorpus_recall@10": 0.14496, "eval_beir-nfcorpus_recall@100": 0.28181, "eval_beir-nfcorpus_recall@20": 0.18167, "eval_beir-nq_ndcg@10": 0.27577, "eval_beir-nq_recall@10": 0.4501, "eval_beir-nq_recall@100": 0.79739, "eval_beir-nq_recall@20": 0.5724, "eval_beir-quora_ndcg@10": 0.76442, "eval_beir-quora_recall@10": 0.87492, "eval_beir-quora_recall@100": 0.97629, "eval_beir-quora_recall@20": 0.92059, "eval_beir-scidocs_ndcg@10": 0.14914, "eval_beir-scidocs_recall@10": 0.15543, "eval_beir-scidocs_recall@100": 0.3646, "eval_beir-scidocs_recall@20": 0.21397, "eval_beir-scifact_ndcg@10": 0.62764, "eval_beir-scifact_recall@10": 0.78622, "eval_beir-scifact_recall@100": 0.91878, "eval_beir-scifact_recall@20": 0.83133, "eval_beir-trec-covid_ndcg@10": 0.57793, "eval_beir-trec-covid_recall@10": 0.624, "eval_beir-trec-covid_recall@100": 0.4586, "eval_beir-trec-covid_recall@20": 0.584, "eval_beir-webis-touche2020_ndcg@10": 0.17713, "eval_beir-webis-touche2020_recall@10": 0.12623, "eval_beir-webis-touche2020_recall@100": 0.43844, "eval_beir-webis-touche2020_recall@20": 0.21418, "eval_senteval-avg_sts": 0.7604239524732033, "eval_senteval-sickr_spearman": 0.7274774773553068, "eval_senteval-stsb_spearman": 0.7933704275910998, "step": 70000, "test_accuracy": 92.87109375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4094051718711853, "test_doc_norm": 1.4126441478729248, "test_inbatch_neg_score": 0.6929380297660828, "test_inbatch_pos_score": 1.5828536748886108, "test_loss": 0.4094051718711853, "test_loss_align": 0.9880537986755371, "test_loss_unif": 3.7412986755371094, "test_loss_unif_q@queue": 3.7412984371185303, "test_norm_diff": 0.04631619155406952, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.3526493310928345, "test_query_norm": 1.4589602947235107, "test_queue_k_norm": 1.4511405229568481, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04135530814528465, "test_stdq": 0.04158562421798706, "test_stdqueue_k": 0.04793735593557358, "test_stdqueue_q": 0.0 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.7648, "doc_norm": 1.457, "encoder_q-embeddings": 1930.8152, "encoder_q-layer.0": 1243.7769, "encoder_q-layer.1": 1287.665, "encoder_q-layer.10": 2398.1602, "encoder_q-layer.11": 6316.1274, "encoder_q-layer.2": 1402.8851, "encoder_q-layer.3": 1489.156, "encoder_q-layer.4": 1572.4678, "encoder_q-layer.5": 1646.0106, "encoder_q-layer.6": 1841.6923, "encoder_q-layer.7": 2049.5759, "encoder_q-layer.8": 2435.709, "encoder_q-layer.9": 2254.6582, "epoch": 0.46, "inbatch_neg_score": 0.3719, "inbatch_pos_score": 0.9365, "learning_rate": 1.661111111111111e-05, "loss": 3.7648, "norm_diff": 0.117, "norm_loss": 0.0, "num_token_doc": 66.7006, "num_token_overlap": 11.6616, "num_token_query": 31.3792, "num_token_union": 65.0792, "num_word_context": 202.0141, "num_word_doc": 49.7446, "num_word_query": 23.3027, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3900.5588, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3726, "query_norm": 1.34, "queue_k_norm": 1.4501, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3792, "sent_len_1": 66.7006, "sent_len_max_0": 127.5512, "sent_len_max_1": 189.41, "stdk": 0.048, "stdq": 0.042, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 70100 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.7641, "doc_norm": 1.4487, "encoder_q-embeddings": 2001.5629, "encoder_q-layer.0": 1315.7207, "encoder_q-layer.1": 1387.5303, "encoder_q-layer.10": 2764.1033, "encoder_q-layer.11": 6801.2085, "encoder_q-layer.2": 1565.8453, "encoder_q-layer.3": 1653.463, "encoder_q-layer.4": 1779.8236, "encoder_q-layer.5": 1848.3057, "encoder_q-layer.6": 2049.3472, "encoder_q-layer.7": 2321.9277, "encoder_q-layer.8": 2768.2507, "encoder_q-layer.9": 2606.8928, "epoch": 0.46, "inbatch_neg_score": 0.3734, "inbatch_pos_score": 0.9321, "learning_rate": 1.655555555555556e-05, "loss": 3.7641, "norm_diff": 0.0952, "norm_loss": 0.0, "num_token_doc": 66.6432, "num_token_overlap": 11.6255, "num_token_query": 31.174, "num_token_union": 64.9745, "num_word_context": 202.2723, "num_word_doc": 49.7694, "num_word_query": 23.1548, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4241.978, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3726, "query_norm": 1.3536, "queue_k_norm": 1.4521, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.174, "sent_len_1": 66.6432, "sent_len_max_0": 127.4675, "sent_len_max_1": 187.55, "stdk": 0.0477, "stdq": 0.0426, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 70200 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7596, "doc_norm": 1.4615, "encoder_q-embeddings": 3518.8303, "encoder_q-layer.0": 2550.6287, "encoder_q-layer.1": 2741.8418, "encoder_q-layer.10": 2575.2168, "encoder_q-layer.11": 6283.2715, "encoder_q-layer.2": 2896.5234, "encoder_q-layer.3": 3178.033, "encoder_q-layer.4": 3086.6606, "encoder_q-layer.5": 3117.3022, "encoder_q-layer.6": 3204.1897, "encoder_q-layer.7": 3102.1292, "encoder_q-layer.8": 2810.6506, "encoder_q-layer.9": 2380.1089, "epoch": 0.46, "inbatch_neg_score": 0.3716, "inbatch_pos_score": 0.9399, "learning_rate": 1.65e-05, "loss": 3.7596, "norm_diff": 0.1195, "norm_loss": 0.0, "num_token_doc": 66.7038, "num_token_overlap": 11.6681, "num_token_query": 31.4079, "num_token_union": 65.1269, "num_word_context": 202.4513, "num_word_doc": 49.7715, "num_word_query": 23.3291, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5066.2294, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3721, "query_norm": 1.342, "queue_k_norm": 1.4532, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4079, "sent_len_1": 66.7038, "sent_len_max_0": 127.4425, "sent_len_max_1": 189.57, "stdk": 0.0482, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 70300 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.7512, "doc_norm": 1.4561, "encoder_q-embeddings": 3852.3108, "encoder_q-layer.0": 2771.4077, "encoder_q-layer.1": 3291.8464, "encoder_q-layer.10": 2700.5647, "encoder_q-layer.11": 6501.1982, "encoder_q-layer.2": 3660.7139, "encoder_q-layer.3": 4233.4429, "encoder_q-layer.4": 4419.5283, "encoder_q-layer.5": 4597.2974, "encoder_q-layer.6": 4611.2627, "encoder_q-layer.7": 4020.4456, "encoder_q-layer.8": 2783.1548, "encoder_q-layer.9": 2567.6113, "epoch": 0.46, "inbatch_neg_score": 0.3713, "inbatch_pos_score": 0.9302, "learning_rate": 1.6444444444444447e-05, "loss": 3.7512, "norm_diff": 0.1142, "norm_loss": 0.0, "num_token_doc": 66.847, "num_token_overlap": 11.705, "num_token_query": 31.5007, "num_token_union": 65.1758, "num_word_context": 202.2701, "num_word_doc": 49.8793, "num_word_query": 23.3972, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5964.3856, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3721, "query_norm": 1.3419, "queue_k_norm": 1.4518, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5007, "sent_len_1": 66.847, "sent_len_max_0": 127.4562, "sent_len_max_1": 189.0437, "stdk": 0.048, "stdq": 0.0423, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 70400 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.7697, "doc_norm": 1.4476, "encoder_q-embeddings": 2118.2153, "encoder_q-layer.0": 1416.6555, "encoder_q-layer.1": 1539.8718, "encoder_q-layer.10": 2462.9675, "encoder_q-layer.11": 6012.79, "encoder_q-layer.2": 1670.9244, "encoder_q-layer.3": 1700.8401, "encoder_q-layer.4": 1782.3762, "encoder_q-layer.5": 1772.322, "encoder_q-layer.6": 1915.2744, "encoder_q-layer.7": 2193.8352, "encoder_q-layer.8": 2505.772, "encoder_q-layer.9": 2359.5073, "epoch": 0.46, "inbatch_neg_score": 0.3786, "inbatch_pos_score": 0.9316, "learning_rate": 1.638888888888889e-05, "loss": 3.7697, "norm_diff": 0.1051, "norm_loss": 0.0, "num_token_doc": 66.9184, "num_token_overlap": 11.654, "num_token_query": 31.4146, "num_token_union": 65.2406, "num_word_context": 202.6739, "num_word_doc": 49.9457, "num_word_query": 23.3219, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3921.6751, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3779, "query_norm": 1.3425, "queue_k_norm": 1.455, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4146, "sent_len_1": 66.9184, "sent_len_max_0": 127.4137, "sent_len_max_1": 190.6738, "stdk": 0.0476, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 70500 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.7605, "doc_norm": 1.4632, "encoder_q-embeddings": 2419.7002, "encoder_q-layer.0": 1669.6147, "encoder_q-layer.1": 1818.1047, "encoder_q-layer.10": 2665.5557, "encoder_q-layer.11": 6336.9204, "encoder_q-layer.2": 2090.1875, "encoder_q-layer.3": 2092.3809, "encoder_q-layer.4": 2149.199, "encoder_q-layer.5": 2113.6196, "encoder_q-layer.6": 2178.5215, "encoder_q-layer.7": 2269.1289, "encoder_q-layer.8": 2583.9377, "encoder_q-layer.9": 2434.5005, "epoch": 0.46, "inbatch_neg_score": 0.3794, "inbatch_pos_score": 0.9385, "learning_rate": 1.6333333333333335e-05, "loss": 3.7605, "norm_diff": 0.1098, "norm_loss": 0.0, "num_token_doc": 66.8014, "num_token_overlap": 11.6947, "num_token_query": 31.4349, "num_token_union": 65.1634, "num_word_context": 202.3781, "num_word_doc": 49.8543, "num_word_query": 23.3602, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4364.3896, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3789, "query_norm": 1.3534, "queue_k_norm": 1.4541, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4349, "sent_len_1": 66.8014, "sent_len_max_0": 127.4788, "sent_len_max_1": 188.7562, "stdk": 0.0482, "stdq": 0.0427, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 70600 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.758, "doc_norm": 1.4555, "encoder_q-embeddings": 3055.7639, "encoder_q-layer.0": 1932.3799, "encoder_q-layer.1": 2084.8694, "encoder_q-layer.10": 2699.9246, "encoder_q-layer.11": 6709.835, "encoder_q-layer.2": 2307.7397, "encoder_q-layer.3": 2365.0745, "encoder_q-layer.4": 2562.2549, "encoder_q-layer.5": 2552.7253, "encoder_q-layer.6": 2676.6067, "encoder_q-layer.7": 2468.3901, "encoder_q-layer.8": 2727.7466, "encoder_q-layer.9": 2418.9705, "epoch": 0.46, "inbatch_neg_score": 0.3756, "inbatch_pos_score": 0.938, "learning_rate": 1.6277777777777777e-05, "loss": 3.758, "norm_diff": 0.1065, "norm_loss": 0.0, "num_token_doc": 66.9952, "num_token_overlap": 11.681, "num_token_query": 31.3873, "num_token_union": 65.2288, "num_word_context": 202.4372, "num_word_doc": 50.0113, "num_word_query": 23.306, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4694.5555, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.376, "query_norm": 1.349, "queue_k_norm": 1.4548, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3873, "sent_len_1": 66.9952, "sent_len_max_0": 127.5925, "sent_len_max_1": 189.5563, "stdk": 0.0479, "stdq": 0.0426, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 70700 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.7893, "doc_norm": 1.4572, "encoder_q-embeddings": 3776.5466, "encoder_q-layer.0": 2801.2939, "encoder_q-layer.1": 3227.7822, "encoder_q-layer.10": 2712.2112, "encoder_q-layer.11": 6667.9131, "encoder_q-layer.2": 3827.4343, "encoder_q-layer.3": 4206.8071, "encoder_q-layer.4": 4389.5796, "encoder_q-layer.5": 4713.3389, "encoder_q-layer.6": 4052.7271, "encoder_q-layer.7": 3347.1306, "encoder_q-layer.8": 2905.395, "encoder_q-layer.9": 2532.2878, "epoch": 0.46, "inbatch_neg_score": 0.3782, "inbatch_pos_score": 0.9248, "learning_rate": 1.6222222222222223e-05, "loss": 3.7893, "norm_diff": 0.1299, "norm_loss": 0.0, "num_token_doc": 66.7468, "num_token_overlap": 11.6323, "num_token_query": 31.3663, "num_token_union": 65.1099, "num_word_context": 202.3748, "num_word_doc": 49.8052, "num_word_query": 23.3064, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5884.7918, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3784, "query_norm": 1.3273, "queue_k_norm": 1.4533, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3663, "sent_len_1": 66.7468, "sent_len_max_0": 127.5763, "sent_len_max_1": 190.2725, "stdk": 0.0479, "stdq": 0.0417, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 70800 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.7595, "doc_norm": 1.4631, "encoder_q-embeddings": 1280.9458, "encoder_q-layer.0": 917.2194, "encoder_q-layer.1": 975.264, "encoder_q-layer.10": 1332.4082, "encoder_q-layer.11": 3035.7004, "encoder_q-layer.2": 1134.3252, "encoder_q-layer.3": 1141.0864, "encoder_q-layer.4": 1095.9347, "encoder_q-layer.5": 1177.9395, "encoder_q-layer.6": 1191.5222, "encoder_q-layer.7": 1082.7588, "encoder_q-layer.8": 1174.1571, "encoder_q-layer.9": 1099.6993, "epoch": 0.46, "inbatch_neg_score": 0.3776, "inbatch_pos_score": 0.9443, "learning_rate": 1.6166666666666665e-05, "loss": 3.7595, "norm_diff": 0.1152, "norm_loss": 0.0, "num_token_doc": 66.6897, "num_token_overlap": 11.6456, "num_token_query": 31.3855, "num_token_union": 65.0683, "num_word_context": 202.3966, "num_word_doc": 49.7773, "num_word_query": 23.3229, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2086.0474, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3779, "query_norm": 1.348, "queue_k_norm": 1.4561, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3855, "sent_len_1": 66.6897, "sent_len_max_0": 127.5288, "sent_len_max_1": 190.9575, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 70900 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.7517, "doc_norm": 1.4552, "encoder_q-embeddings": 1399.5203, "encoder_q-layer.0": 951.1721, "encoder_q-layer.1": 1100.2881, "encoder_q-layer.10": 1210.6981, "encoder_q-layer.11": 3093.5916, "encoder_q-layer.2": 1271.4303, "encoder_q-layer.3": 1284.7509, "encoder_q-layer.4": 1282.7467, "encoder_q-layer.5": 1223.2405, "encoder_q-layer.6": 1255.5914, "encoder_q-layer.7": 1350.9838, "encoder_q-layer.8": 1421.4174, "encoder_q-layer.9": 1231.9919, "epoch": 0.46, "inbatch_neg_score": 0.3793, "inbatch_pos_score": 0.9604, "learning_rate": 1.6111111111111115e-05, "loss": 3.7517, "norm_diff": 0.1001, "norm_loss": 0.0, "num_token_doc": 66.6191, "num_token_overlap": 11.64, "num_token_query": 31.2683, "num_token_union": 64.9809, "num_word_context": 202.1503, "num_word_doc": 49.7112, "num_word_query": 23.2204, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2280.6726, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3789, "query_norm": 1.3551, "queue_k_norm": 1.4554, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2683, "sent_len_1": 66.6191, "sent_len_max_0": 127.44, "sent_len_max_1": 189.725, "stdk": 0.0478, "stdq": 0.0428, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 71000 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.7702, "doc_norm": 1.4604, "encoder_q-embeddings": 1276.3445, "encoder_q-layer.0": 856.4527, "encoder_q-layer.1": 943.1135, "encoder_q-layer.10": 1208.4575, "encoder_q-layer.11": 3038.5315, "encoder_q-layer.2": 1055.8381, "encoder_q-layer.3": 1133.9779, "encoder_q-layer.4": 1279.1676, "encoder_q-layer.5": 1211.9712, "encoder_q-layer.6": 1265.7737, "encoder_q-layer.7": 1283.2539, "encoder_q-layer.8": 1351.3802, "encoder_q-layer.9": 1150.0024, "epoch": 0.46, "inbatch_neg_score": 0.3803, "inbatch_pos_score": 0.9243, "learning_rate": 1.6055555555555557e-05, "loss": 3.7702, "norm_diff": 0.1339, "norm_loss": 0.0, "num_token_doc": 66.5895, "num_token_overlap": 11.6153, "num_token_query": 31.2416, "num_token_union": 65.0023, "num_word_context": 201.9322, "num_word_doc": 49.7197, "num_word_query": 23.1682, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2169.047, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3806, "query_norm": 1.3264, "queue_k_norm": 1.4557, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2416, "sent_len_1": 66.5895, "sent_len_max_0": 127.4513, "sent_len_max_1": 188.9387, "stdk": 0.048, "stdq": 0.0416, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 71100 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.7564, "doc_norm": 1.4619, "encoder_q-embeddings": 1423.5798, "encoder_q-layer.0": 979.7248, "encoder_q-layer.1": 1054.9644, "encoder_q-layer.10": 1356.9274, "encoder_q-layer.11": 3303.1484, "encoder_q-layer.2": 1273.678, "encoder_q-layer.3": 1362.0768, "encoder_q-layer.4": 1495.51, "encoder_q-layer.5": 1425.6729, "encoder_q-layer.6": 1561.5583, "encoder_q-layer.7": 1511.1655, "encoder_q-layer.8": 1363.8513, "encoder_q-layer.9": 1241.8813, "epoch": 0.46, "inbatch_neg_score": 0.3748, "inbatch_pos_score": 0.9297, "learning_rate": 1.6000000000000003e-05, "loss": 3.7564, "norm_diff": 0.1292, "norm_loss": 0.0, "num_token_doc": 66.6246, "num_token_overlap": 11.6975, "num_token_query": 31.4998, "num_token_union": 65.0669, "num_word_context": 201.9062, "num_word_doc": 49.7404, "num_word_query": 23.3956, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2395.1094, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.375, "query_norm": 1.3326, "queue_k_norm": 1.458, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4998, "sent_len_1": 66.6246, "sent_len_max_0": 127.4275, "sent_len_max_1": 189.82, "stdk": 0.0481, "stdq": 0.0421, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 71200 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.7728, "doc_norm": 1.454, "encoder_q-embeddings": 2249.7654, "encoder_q-layer.0": 1566.9724, "encoder_q-layer.1": 1689.0281, "encoder_q-layer.10": 1273.8151, "encoder_q-layer.11": 3118.718, "encoder_q-layer.2": 1907.0715, "encoder_q-layer.3": 1907.6162, "encoder_q-layer.4": 1918.7048, "encoder_q-layer.5": 1887.9126, "encoder_q-layer.6": 1786.3068, "encoder_q-layer.7": 1599.6007, "encoder_q-layer.8": 1499.8065, "encoder_q-layer.9": 1290.4431, "epoch": 0.46, "inbatch_neg_score": 0.3764, "inbatch_pos_score": 0.9204, "learning_rate": 1.5944444444444445e-05, "loss": 3.7728, "norm_diff": 0.119, "norm_loss": 0.0, "num_token_doc": 66.6576, "num_token_overlap": 11.6438, "num_token_query": 31.3134, "num_token_union": 65.0276, "num_word_context": 201.9968, "num_word_doc": 49.7525, "num_word_query": 23.2443, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2914.3912, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.377, "query_norm": 1.335, "queue_k_norm": 1.456, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3134, "sent_len_1": 66.6576, "sent_len_max_0": 127.3312, "sent_len_max_1": 189.7363, "stdk": 0.0477, "stdq": 0.0421, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 71300 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.7616, "doc_norm": 1.4631, "encoder_q-embeddings": 2441.396, "encoder_q-layer.0": 1716.3905, "encoder_q-layer.1": 1984.098, "encoder_q-layer.10": 1375.5491, "encoder_q-layer.11": 3250.1094, "encoder_q-layer.2": 2225.6013, "encoder_q-layer.3": 2283.0193, "encoder_q-layer.4": 2388.4668, "encoder_q-layer.5": 2343.8311, "encoder_q-layer.6": 2150.3203, "encoder_q-layer.7": 1904.2616, "encoder_q-layer.8": 1689.147, "encoder_q-layer.9": 1329.9755, "epoch": 0.46, "inbatch_neg_score": 0.3805, "inbatch_pos_score": 0.9438, "learning_rate": 1.588888888888889e-05, "loss": 3.7616, "norm_diff": 0.0952, "norm_loss": 0.0, "num_token_doc": 66.8666, "num_token_overlap": 11.66, "num_token_query": 31.2997, "num_token_union": 65.1148, "num_word_context": 202.1855, "num_word_doc": 49.8827, "num_word_query": 23.2262, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3206.3016, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3799, "query_norm": 1.3679, "queue_k_norm": 1.456, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2997, "sent_len_1": 66.8666, "sent_len_max_0": 127.545, "sent_len_max_1": 188.7862, "stdk": 0.048, "stdq": 0.0434, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 71400 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 3.7684, "doc_norm": 1.4604, "encoder_q-embeddings": 3040.845, "encoder_q-layer.0": 2113.8784, "encoder_q-layer.1": 2498.4722, "encoder_q-layer.10": 1272.6379, "encoder_q-layer.11": 3242.5479, "encoder_q-layer.2": 3097.3918, "encoder_q-layer.3": 3695.0383, "encoder_q-layer.4": 3972.064, "encoder_q-layer.5": 3571.4451, "encoder_q-layer.6": 3975.2798, "encoder_q-layer.7": 2959.3218, "encoder_q-layer.8": 2295.1543, "encoder_q-layer.9": 1330.9294, "epoch": 0.47, "inbatch_neg_score": 0.3764, "inbatch_pos_score": 0.9355, "learning_rate": 1.5833333333333333e-05, "loss": 3.7684, "norm_diff": 0.115, "norm_loss": 0.0, "num_token_doc": 66.914, "num_token_overlap": 11.5999, "num_token_query": 31.1556, "num_token_union": 65.1415, "num_word_context": 202.386, "num_word_doc": 49.8856, "num_word_query": 23.1065, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4431.0571, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3765, "query_norm": 1.3455, "queue_k_norm": 1.4592, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.1556, "sent_len_1": 66.914, "sent_len_max_0": 127.4237, "sent_len_max_1": 190.405, "stdk": 0.048, "stdq": 0.0426, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 71500 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 3.7523, "doc_norm": 1.4617, "encoder_q-embeddings": 1160.1956, "encoder_q-layer.0": 779.9612, "encoder_q-layer.1": 823.9305, "encoder_q-layer.10": 1294.947, "encoder_q-layer.11": 3159.3298, "encoder_q-layer.2": 911.8644, "encoder_q-layer.3": 965.2357, "encoder_q-layer.4": 1050.5007, "encoder_q-layer.5": 1105.4753, "encoder_q-layer.6": 1181.1393, "encoder_q-layer.7": 1291.4009, "encoder_q-layer.8": 1368.3965, "encoder_q-layer.9": 1224.0542, "epoch": 0.47, "inbatch_neg_score": 0.3809, "inbatch_pos_score": 0.9453, "learning_rate": 1.577777777777778e-05, "loss": 3.7523, "norm_diff": 0.1163, "norm_loss": 0.0, "num_token_doc": 66.755, "num_token_overlap": 11.7204, "num_token_query": 31.3885, "num_token_union": 65.0834, "num_word_context": 202.2684, "num_word_doc": 49.8042, "num_word_query": 23.3179, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2125.8496, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3811, "query_norm": 1.3454, "queue_k_norm": 1.4569, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3885, "sent_len_1": 66.755, "sent_len_max_0": 127.3838, "sent_len_max_1": 188.8975, "stdk": 0.048, "stdq": 0.0425, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 71600 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.768, "doc_norm": 1.4649, "encoder_q-embeddings": 1370.6246, "encoder_q-layer.0": 914.1454, "encoder_q-layer.1": 1058.504, "encoder_q-layer.10": 1363.4198, "encoder_q-layer.11": 2964.9119, "encoder_q-layer.2": 1178.1995, "encoder_q-layer.3": 1244.8865, "encoder_q-layer.4": 1217.3091, "encoder_q-layer.5": 1138.3683, "encoder_q-layer.6": 1155.6506, "encoder_q-layer.7": 1150.6772, "encoder_q-layer.8": 1313.9177, "encoder_q-layer.9": 1196.8922, "epoch": 0.47, "inbatch_neg_score": 0.3803, "inbatch_pos_score": 0.9565, "learning_rate": 1.5722222222222225e-05, "loss": 3.768, "norm_diff": 0.1125, "norm_loss": 0.0, "num_token_doc": 66.5966, "num_token_overlap": 11.634, "num_token_query": 31.1684, "num_token_union": 64.917, "num_word_context": 202.0746, "num_word_doc": 49.6753, "num_word_query": 23.1218, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2159.5668, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3806, "query_norm": 1.3523, "queue_k_norm": 1.4582, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.1684, "sent_len_1": 66.5966, "sent_len_max_0": 127.3988, "sent_len_max_1": 190.065, "stdk": 0.0481, "stdq": 0.0427, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 71700 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.7667, "doc_norm": 1.4646, "encoder_q-embeddings": 2200.0928, "encoder_q-layer.0": 1516.4366, "encoder_q-layer.1": 1751.9198, "encoder_q-layer.10": 1164.1221, "encoder_q-layer.11": 3086.8376, "encoder_q-layer.2": 1860.7366, "encoder_q-layer.3": 2135.5256, "encoder_q-layer.4": 2196.4043, "encoder_q-layer.5": 2325.4199, "encoder_q-layer.6": 2728.1951, "encoder_q-layer.7": 2565.9109, "encoder_q-layer.8": 1844.8579, "encoder_q-layer.9": 1194.624, "epoch": 0.47, "inbatch_neg_score": 0.3792, "inbatch_pos_score": 0.9526, "learning_rate": 1.5666666666666667e-05, "loss": 3.7667, "norm_diff": 0.1233, "norm_loss": 0.0, "num_token_doc": 66.7904, "num_token_overlap": 11.7153, "num_token_query": 31.4156, "num_token_union": 65.0834, "num_word_context": 202.6374, "num_word_doc": 49.8247, "num_word_query": 23.3481, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3248.4924, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3809, "query_norm": 1.3413, "queue_k_norm": 1.4586, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4156, "sent_len_1": 66.7904, "sent_len_max_0": 127.5637, "sent_len_max_1": 190.7525, "stdk": 0.0481, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 71800 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.7492, "doc_norm": 1.4652, "encoder_q-embeddings": 1462.3102, "encoder_q-layer.0": 1008.6201, "encoder_q-layer.1": 1134.8485, "encoder_q-layer.10": 1294.0782, "encoder_q-layer.11": 3105.1301, "encoder_q-layer.2": 1348.2803, "encoder_q-layer.3": 1373.3348, "encoder_q-layer.4": 1544.2501, "encoder_q-layer.5": 1585.3943, "encoder_q-layer.6": 1508.1097, "encoder_q-layer.7": 1553.6865, "encoder_q-layer.8": 1522.541, "encoder_q-layer.9": 1287.4459, "epoch": 0.47, "inbatch_neg_score": 0.3759, "inbatch_pos_score": 0.9487, "learning_rate": 1.5611111111111113e-05, "loss": 3.7492, "norm_diff": 0.1232, "norm_loss": 0.0, "num_token_doc": 66.9257, "num_token_overlap": 11.6686, "num_token_query": 31.4823, "num_token_union": 65.2578, "num_word_context": 202.3235, "num_word_doc": 49.9266, "num_word_query": 23.3837, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2405.7878, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3777, "query_norm": 1.342, "queue_k_norm": 1.4605, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4823, "sent_len_1": 66.9257, "sent_len_max_0": 127.5962, "sent_len_max_1": 190.22, "stdk": 0.0481, "stdq": 0.0424, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 71900 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.7666, "doc_norm": 1.4594, "encoder_q-embeddings": 1501.0798, "encoder_q-layer.0": 1005.8193, "encoder_q-layer.1": 1126.3918, "encoder_q-layer.10": 1344.4298, "encoder_q-layer.11": 3260.177, "encoder_q-layer.2": 1282.1715, "encoder_q-layer.3": 1339.9349, "encoder_q-layer.4": 1388.1459, "encoder_q-layer.5": 1377.6475, "encoder_q-layer.6": 1327.2847, "encoder_q-layer.7": 1276.806, "encoder_q-layer.8": 1400.5208, "encoder_q-layer.9": 1261.5605, "epoch": 0.47, "inbatch_neg_score": 0.3872, "inbatch_pos_score": 0.9253, "learning_rate": 1.5555555555555555e-05, "loss": 3.7666, "norm_diff": 0.1119, "norm_loss": 0.0, "num_token_doc": 66.7882, "num_token_overlap": 11.6767, "num_token_query": 31.4793, "num_token_union": 65.1862, "num_word_context": 202.6351, "num_word_doc": 49.8107, "num_word_query": 23.3995, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2336.2857, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3853, "query_norm": 1.3475, "queue_k_norm": 1.4602, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4793, "sent_len_1": 66.7882, "sent_len_max_0": 127.6462, "sent_len_max_1": 190.3663, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 72000 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.733, "doc_norm": 1.4587, "encoder_q-embeddings": 1130.7058, "encoder_q-layer.0": 789.7761, "encoder_q-layer.1": 838.192, "encoder_q-layer.10": 1352.8271, "encoder_q-layer.11": 3178.1133, "encoder_q-layer.2": 936.9615, "encoder_q-layer.3": 971.7573, "encoder_q-layer.4": 993.6064, "encoder_q-layer.5": 1011.6237, "encoder_q-layer.6": 1142.4834, "encoder_q-layer.7": 1171.1696, "encoder_q-layer.8": 1247.1635, "encoder_q-layer.9": 1168.7445, "epoch": 0.47, "inbatch_neg_score": 0.3864, "inbatch_pos_score": 0.9287, "learning_rate": 1.55e-05, "loss": 3.733, "norm_diff": 0.1069, "norm_loss": 0.0, "num_token_doc": 66.8179, "num_token_overlap": 11.7025, "num_token_query": 31.3794, "num_token_union": 65.1117, "num_word_context": 202.1638, "num_word_doc": 49.846, "num_word_query": 23.2981, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2079.6407, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.387, "query_norm": 1.3518, "queue_k_norm": 1.4602, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3794, "sent_len_1": 66.8179, "sent_len_max_0": 127.455, "sent_len_max_1": 188.1587, "stdk": 0.0478, "stdq": 0.0425, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 72100 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.7611, "doc_norm": 1.4607, "encoder_q-embeddings": 1075.5077, "encoder_q-layer.0": 702.293, "encoder_q-layer.1": 727.2552, "encoder_q-layer.10": 1371.6575, "encoder_q-layer.11": 3232.1533, "encoder_q-layer.2": 842.515, "encoder_q-layer.3": 854.3862, "encoder_q-layer.4": 913.9031, "encoder_q-layer.5": 944.8005, "encoder_q-layer.6": 1056.717, "encoder_q-layer.7": 1148.2213, "encoder_q-layer.8": 1291.1218, "encoder_q-layer.9": 1242.545, "epoch": 0.47, "inbatch_neg_score": 0.3875, "inbatch_pos_score": 0.9478, "learning_rate": 1.5444444444444446e-05, "loss": 3.7611, "norm_diff": 0.1137, "norm_loss": 0.0, "num_token_doc": 66.5599, "num_token_overlap": 11.6313, "num_token_query": 31.241, "num_token_union": 64.9634, "num_word_context": 202.1612, "num_word_doc": 49.6988, "num_word_query": 23.1995, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2056.3643, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3875, "query_norm": 1.3469, "queue_k_norm": 1.461, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.241, "sent_len_1": 66.5599, "sent_len_max_0": 127.3675, "sent_len_max_1": 187.6925, "stdk": 0.0479, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 72200 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.7612, "doc_norm": 1.4574, "encoder_q-embeddings": 4421.376, "encoder_q-layer.0": 3047.2458, "encoder_q-layer.1": 3420.9802, "encoder_q-layer.10": 1267.7736, "encoder_q-layer.11": 3094.2922, "encoder_q-layer.2": 3828.4587, "encoder_q-layer.3": 3941.0625, "encoder_q-layer.4": 4298.7559, "encoder_q-layer.5": 4179.5361, "encoder_q-layer.6": 3219.8953, "encoder_q-layer.7": 2495.7798, "encoder_q-layer.8": 1912.9302, "encoder_q-layer.9": 1346.4467, "epoch": 0.47, "inbatch_neg_score": 0.3907, "inbatch_pos_score": 0.9429, "learning_rate": 1.538888888888889e-05, "loss": 3.7612, "norm_diff": 0.1101, "norm_loss": 0.0, "num_token_doc": 66.5047, "num_token_overlap": 11.5647, "num_token_query": 31.1504, "num_token_union": 64.9288, "num_word_context": 202.0187, "num_word_doc": 49.6548, "num_word_query": 23.1116, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5054.5908, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3901, "query_norm": 1.3473, "queue_k_norm": 1.4619, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.1504, "sent_len_1": 66.5047, "sent_len_max_0": 127.2375, "sent_len_max_1": 187.9512, "stdk": 0.0477, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 72300 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.7525, "doc_norm": 1.456, "encoder_q-embeddings": 1530.0795, "encoder_q-layer.0": 1099.7833, "encoder_q-layer.1": 1233.7255, "encoder_q-layer.10": 1357.3761, "encoder_q-layer.11": 3102.9924, "encoder_q-layer.2": 1499.6349, "encoder_q-layer.3": 1658.287, "encoder_q-layer.4": 1745.5195, "encoder_q-layer.5": 1844.2448, "encoder_q-layer.6": 1893.7296, "encoder_q-layer.7": 1795.7939, "encoder_q-layer.8": 1585.5823, "encoder_q-layer.9": 1300.7695, "epoch": 0.47, "inbatch_neg_score": 0.3931, "inbatch_pos_score": 0.9639, "learning_rate": 1.5333333333333334e-05, "loss": 3.7525, "norm_diff": 0.0884, "norm_loss": 0.0, "num_token_doc": 66.7541, "num_token_overlap": 11.7, "num_token_query": 31.4839, "num_token_union": 65.187, "num_word_context": 202.3582, "num_word_doc": 49.8393, "num_word_query": 23.4087, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2592.1817, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3923, "query_norm": 1.3677, "queue_k_norm": 1.4612, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4839, "sent_len_1": 66.7541, "sent_len_max_0": 127.4287, "sent_len_max_1": 189.4725, "stdk": 0.0477, "stdq": 0.0429, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 72400 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.7481, "doc_norm": 1.46, "encoder_q-embeddings": 1391.9342, "encoder_q-layer.0": 1020.0787, "encoder_q-layer.1": 999.5254, "encoder_q-layer.10": 1346.1949, "encoder_q-layer.11": 3084.0867, "encoder_q-layer.2": 1158.0948, "encoder_q-layer.3": 1203.3136, "encoder_q-layer.4": 1155.6642, "encoder_q-layer.5": 1145.4597, "encoder_q-layer.6": 1259.462, "encoder_q-layer.7": 1243.6949, "encoder_q-layer.8": 1372.755, "encoder_q-layer.9": 1212.1301, "epoch": 0.47, "inbatch_neg_score": 0.3935, "inbatch_pos_score": 0.9487, "learning_rate": 1.527777777777778e-05, "loss": 3.7481, "norm_diff": 0.1073, "norm_loss": 0.0, "num_token_doc": 67.0052, "num_token_overlap": 11.7211, "num_token_query": 31.5418, "num_token_union": 65.3525, "num_word_context": 202.3477, "num_word_doc": 49.9774, "num_word_query": 23.4209, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2219.2022, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3918, "query_norm": 1.3526, "queue_k_norm": 1.4621, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5418, "sent_len_1": 67.0052, "sent_len_max_0": 127.51, "sent_len_max_1": 190.8613, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 72500 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.7373, "doc_norm": 1.4648, "encoder_q-embeddings": 1419.996, "encoder_q-layer.0": 955.0559, "encoder_q-layer.1": 1111.1201, "encoder_q-layer.10": 1335.2452, "encoder_q-layer.11": 3238.103, "encoder_q-layer.2": 1267.957, "encoder_q-layer.3": 1391.0541, "encoder_q-layer.4": 1307.1313, "encoder_q-layer.5": 1300.1106, "encoder_q-layer.6": 1232.953, "encoder_q-layer.7": 1257.8208, "encoder_q-layer.8": 1359.1941, "encoder_q-layer.9": 1237.9041, "epoch": 0.47, "inbatch_neg_score": 0.3965, "inbatch_pos_score": 0.9521, "learning_rate": 1.5222222222222224e-05, "loss": 3.7373, "norm_diff": 0.1104, "norm_loss": 0.0, "num_token_doc": 66.5248, "num_token_overlap": 11.7141, "num_token_query": 31.4737, "num_token_union": 64.9994, "num_word_context": 202.0742, "num_word_doc": 49.6557, "num_word_query": 23.3727, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2292.9594, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3967, "query_norm": 1.3544, "queue_k_norm": 1.4601, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4737, "sent_len_1": 66.5248, "sent_len_max_0": 127.5062, "sent_len_max_1": 187.3013, "stdk": 0.048, "stdq": 0.0423, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 72600 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.7688, "doc_norm": 1.4556, "encoder_q-embeddings": 1090.9165, "encoder_q-layer.0": 728.5011, "encoder_q-layer.1": 787.9652, "encoder_q-layer.10": 1217.2416, "encoder_q-layer.11": 2981.874, "encoder_q-layer.2": 873.3572, "encoder_q-layer.3": 896.9537, "encoder_q-layer.4": 956.1658, "encoder_q-layer.5": 947.0004, "encoder_q-layer.6": 1085.4645, "encoder_q-layer.7": 1099.3915, "encoder_q-layer.8": 1295.5052, "encoder_q-layer.9": 1116.028, "epoch": 0.47, "inbatch_neg_score": 0.3988, "inbatch_pos_score": 0.9731, "learning_rate": 1.5166666666666668e-05, "loss": 3.7688, "norm_diff": 0.0997, "norm_loss": 0.0, "num_token_doc": 66.6003, "num_token_overlap": 11.6544, "num_token_query": 31.3199, "num_token_union": 64.9955, "num_word_context": 201.763, "num_word_doc": 49.7035, "num_word_query": 23.2627, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1992.2148, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3989, "query_norm": 1.3559, "queue_k_norm": 1.4619, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3199, "sent_len_1": 66.6003, "sent_len_max_0": 127.3025, "sent_len_max_1": 189.1575, "stdk": 0.0476, "stdq": 0.0424, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 72700 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.7552, "doc_norm": 1.4599, "encoder_q-embeddings": 1185.9274, "encoder_q-layer.0": 795.2012, "encoder_q-layer.1": 861.5354, "encoder_q-layer.10": 1222.0164, "encoder_q-layer.11": 3192.5806, "encoder_q-layer.2": 933.1732, "encoder_q-layer.3": 1000.4019, "encoder_q-layer.4": 1034.7997, "encoder_q-layer.5": 1010.9132, "encoder_q-layer.6": 1124.7944, "encoder_q-layer.7": 1154.7593, "encoder_q-layer.8": 1301.7811, "encoder_q-layer.9": 1185.8921, "epoch": 0.47, "inbatch_neg_score": 0.4038, "inbatch_pos_score": 0.9678, "learning_rate": 1.5111111111111112e-05, "loss": 3.7552, "norm_diff": 0.0961, "norm_loss": 0.0, "num_token_doc": 66.7694, "num_token_overlap": 11.678, "num_token_query": 31.344, "num_token_union": 65.0856, "num_word_context": 202.4114, "num_word_doc": 49.791, "num_word_query": 23.2661, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2127.3834, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4033, "query_norm": 1.3638, "queue_k_norm": 1.4622, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.344, "sent_len_1": 66.7694, "sent_len_max_0": 127.3975, "sent_len_max_1": 190.0112, "stdk": 0.0477, "stdq": 0.0427, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 72800 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.743, "doc_norm": 1.4606, "encoder_q-embeddings": 2688.2561, "encoder_q-layer.0": 1834.0536, "encoder_q-layer.1": 1957.2704, "encoder_q-layer.10": 2576.2268, "encoder_q-layer.11": 6370.1401, "encoder_q-layer.2": 2258.55, "encoder_q-layer.3": 2429.7495, "encoder_q-layer.4": 2680.9854, "encoder_q-layer.5": 2727.2729, "encoder_q-layer.6": 2742.0933, "encoder_q-layer.7": 2783.2764, "encoder_q-layer.8": 2679.1016, "encoder_q-layer.9": 2407.1384, "epoch": 0.47, "inbatch_neg_score": 0.4071, "inbatch_pos_score": 0.959, "learning_rate": 1.5055555555555556e-05, "loss": 3.743, "norm_diff": 0.1026, "norm_loss": 0.0, "num_token_doc": 66.8919, "num_token_overlap": 11.6813, "num_token_query": 31.3462, "num_token_union": 65.1971, "num_word_context": 202.3504, "num_word_doc": 49.9167, "num_word_query": 23.2408, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4560.0677, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4065, "query_norm": 1.358, "queue_k_norm": 1.465, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3462, "sent_len_1": 66.8919, "sent_len_max_0": 127.4262, "sent_len_max_1": 189.395, "stdk": 0.0477, "stdq": 0.0424, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 72900 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7302, "doc_norm": 1.4703, "encoder_q-embeddings": 2496.45, "encoder_q-layer.0": 1629.5177, "encoder_q-layer.1": 1722.3938, "encoder_q-layer.10": 2550.3804, "encoder_q-layer.11": 6456.6479, "encoder_q-layer.2": 1861.2299, "encoder_q-layer.3": 1900.151, "encoder_q-layer.4": 2022.5676, "encoder_q-layer.5": 2028.5021, "encoder_q-layer.6": 2154.3535, "encoder_q-layer.7": 2435.7556, "encoder_q-layer.8": 2768.7698, "encoder_q-layer.9": 2525.7415, "epoch": 0.48, "inbatch_neg_score": 0.4085, "inbatch_pos_score": 0.9775, "learning_rate": 1.5e-05, "loss": 3.7302, "norm_diff": 0.1099, "norm_loss": 0.0, "num_token_doc": 66.9315, "num_token_overlap": 11.6771, "num_token_query": 31.2697, "num_token_union": 65.1488, "num_word_context": 202.4045, "num_word_doc": 49.9216, "num_word_query": 23.2111, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4272.8184, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4087, "query_norm": 1.3604, "queue_k_norm": 1.4653, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2697, "sent_len_1": 66.9315, "sent_len_max_0": 127.6338, "sent_len_max_1": 192.0075, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 73000 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.7516, "doc_norm": 1.4655, "encoder_q-embeddings": 2930.5352, "encoder_q-layer.0": 1969.0269, "encoder_q-layer.1": 2180.6951, "encoder_q-layer.10": 2747.0088, "encoder_q-layer.11": 6520.7642, "encoder_q-layer.2": 2481.3613, "encoder_q-layer.3": 2667.5706, "encoder_q-layer.4": 2966.9795, "encoder_q-layer.5": 2672.3743, "encoder_q-layer.6": 2788.8286, "encoder_q-layer.7": 2965.1414, "encoder_q-layer.8": 3201.3132, "encoder_q-layer.9": 2679.7051, "epoch": 0.48, "inbatch_neg_score": 0.4113, "inbatch_pos_score": 0.9814, "learning_rate": 1.4944444444444444e-05, "loss": 3.7516, "norm_diff": 0.0986, "norm_loss": 0.0, "num_token_doc": 67.0144, "num_token_overlap": 11.6539, "num_token_query": 31.3483, "num_token_union": 65.2516, "num_word_context": 202.4178, "num_word_doc": 49.9936, "num_word_query": 23.2949, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4839.7656, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4106, "query_norm": 1.3669, "queue_k_norm": 1.4672, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3483, "sent_len_1": 67.0144, "sent_len_max_0": 127.54, "sent_len_max_1": 189.4375, "stdk": 0.0479, "stdq": 0.0429, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 73100 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.7479, "doc_norm": 1.4675, "encoder_q-embeddings": 4732.8262, "encoder_q-layer.0": 3675.6333, "encoder_q-layer.1": 4282.5269, "encoder_q-layer.10": 2661.9392, "encoder_q-layer.11": 6591.1865, "encoder_q-layer.2": 5153.1719, "encoder_q-layer.3": 4748.938, "encoder_q-layer.4": 5735.8403, "encoder_q-layer.5": 4259.0435, "encoder_q-layer.6": 4216.2168, "encoder_q-layer.7": 3905.2739, "encoder_q-layer.8": 2809.5789, "encoder_q-layer.9": 2379.2415, "epoch": 0.48, "inbatch_neg_score": 0.4132, "inbatch_pos_score": 0.9761, "learning_rate": 1.4888888888888888e-05, "loss": 3.7479, "norm_diff": 0.1026, "norm_loss": 0.0, "num_token_doc": 66.6964, "num_token_overlap": 11.7063, "num_token_query": 31.361, "num_token_union": 65.0257, "num_word_context": 201.8488, "num_word_doc": 49.7858, "num_word_query": 23.2999, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6681.6131, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4126, "query_norm": 1.3649, "queue_k_norm": 1.4666, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.361, "sent_len_1": 66.6964, "sent_len_max_0": 127.5487, "sent_len_max_1": 189.0775, "stdk": 0.0479, "stdq": 0.0427, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 73200 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.7678, "doc_norm": 1.4647, "encoder_q-embeddings": 2302.1912, "encoder_q-layer.0": 1561.6368, "encoder_q-layer.1": 1690.4749, "encoder_q-layer.10": 2525.113, "encoder_q-layer.11": 6487.2578, "encoder_q-layer.2": 1925.533, "encoder_q-layer.3": 2050.5527, "encoder_q-layer.4": 2234.0679, "encoder_q-layer.5": 2392.2881, "encoder_q-layer.6": 2536.9202, "encoder_q-layer.7": 2834.9629, "encoder_q-layer.8": 2974.158, "encoder_q-layer.9": 2526.9192, "epoch": 0.48, "inbatch_neg_score": 0.4122, "inbatch_pos_score": 0.9697, "learning_rate": 1.4833333333333336e-05, "loss": 3.7678, "norm_diff": 0.1087, "norm_loss": 0.0, "num_token_doc": 66.8402, "num_token_overlap": 11.694, "num_token_query": 31.4852, "num_token_union": 65.2376, "num_word_context": 202.5478, "num_word_doc": 49.8716, "num_word_query": 23.3736, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4398.7061, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4116, "query_norm": 1.3559, "queue_k_norm": 1.4665, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4852, "sent_len_1": 66.8402, "sent_len_max_0": 127.3762, "sent_len_max_1": 189.6312, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 73300 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.7593, "doc_norm": 1.4721, "encoder_q-embeddings": 2178.3086, "encoder_q-layer.0": 1412.3237, "encoder_q-layer.1": 1523.4294, "encoder_q-layer.10": 2432.572, "encoder_q-layer.11": 6125.3916, "encoder_q-layer.2": 1722.9097, "encoder_q-layer.3": 1774.4996, "encoder_q-layer.4": 1856.6417, "encoder_q-layer.5": 1828.6134, "encoder_q-layer.6": 1912.0295, "encoder_q-layer.7": 2115.7212, "encoder_q-layer.8": 2303.97, "encoder_q-layer.9": 2263.927, "epoch": 0.48, "inbatch_neg_score": 0.4141, "inbatch_pos_score": 0.9961, "learning_rate": 1.477777777777778e-05, "loss": 3.7593, "norm_diff": 0.122, "norm_loss": 0.0, "num_token_doc": 66.6288, "num_token_overlap": 11.6155, "num_token_query": 31.2408, "num_token_union": 65.0299, "num_word_context": 201.8951, "num_word_doc": 49.7739, "num_word_query": 23.2132, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3958.0037, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4141, "query_norm": 1.3501, "queue_k_norm": 1.4682, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2408, "sent_len_1": 66.6288, "sent_len_max_0": 127.2963, "sent_len_max_1": 187.0575, "stdk": 0.0481, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 73400 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.7606, "doc_norm": 1.4696, "encoder_q-embeddings": 1434.7107, "encoder_q-layer.0": 990.538, "encoder_q-layer.1": 1053.2567, "encoder_q-layer.10": 1274.9509, "encoder_q-layer.11": 3111.6907, "encoder_q-layer.2": 1147.766, "encoder_q-layer.3": 1199.7407, "encoder_q-layer.4": 1207.3184, "encoder_q-layer.5": 1294.0553, "encoder_q-layer.6": 1469.3701, "encoder_q-layer.7": 1575.3236, "encoder_q-layer.8": 1579.6545, "encoder_q-layer.9": 1316.9769, "epoch": 0.48, "inbatch_neg_score": 0.4169, "inbatch_pos_score": 0.9751, "learning_rate": 1.4722222222222224e-05, "loss": 3.7606, "norm_diff": 0.123, "norm_loss": 0.0, "num_token_doc": 66.8932, "num_token_overlap": 11.6879, "num_token_query": 31.3564, "num_token_union": 65.1728, "num_word_context": 202.3363, "num_word_doc": 49.8835, "num_word_query": 23.294, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2337.5001, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4167, "query_norm": 1.3466, "queue_k_norm": 1.4692, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3564, "sent_len_1": 66.8932, "sent_len_max_0": 127.6325, "sent_len_max_1": 189.25, "stdk": 0.048, "stdq": 0.042, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 73500 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.7414, "doc_norm": 1.4732, "encoder_q-embeddings": 1256.2498, "encoder_q-layer.0": 851.0891, "encoder_q-layer.1": 878.7064, "encoder_q-layer.10": 1182.7383, "encoder_q-layer.11": 3176.8818, "encoder_q-layer.2": 980.9913, "encoder_q-layer.3": 1014.9814, "encoder_q-layer.4": 1036.3865, "encoder_q-layer.5": 1026.6783, "encoder_q-layer.6": 1112.495, "encoder_q-layer.7": 1201.0936, "encoder_q-layer.8": 1309.2961, "encoder_q-layer.9": 1207.3503, "epoch": 0.48, "inbatch_neg_score": 0.4202, "inbatch_pos_score": 0.9683, "learning_rate": 1.4666666666666668e-05, "loss": 3.7414, "norm_diff": 0.118, "norm_loss": 0.0, "num_token_doc": 66.4796, "num_token_overlap": 11.6663, "num_token_query": 31.3131, "num_token_union": 64.9353, "num_word_context": 202.0043, "num_word_doc": 49.5856, "num_word_query": 23.2476, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2135.9678, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4199, "query_norm": 1.3553, "queue_k_norm": 1.4698, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3131, "sent_len_1": 66.4796, "sent_len_max_0": 127.5325, "sent_len_max_1": 188.5875, "stdk": 0.0481, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 73600 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.7611, "doc_norm": 1.4663, "encoder_q-embeddings": 1379.4819, "encoder_q-layer.0": 954.7391, "encoder_q-layer.1": 1111.3354, "encoder_q-layer.10": 1242.2366, "encoder_q-layer.11": 3146.417, "encoder_q-layer.2": 1269.4463, "encoder_q-layer.3": 1365.9915, "encoder_q-layer.4": 1496.6855, "encoder_q-layer.5": 1585.3059, "encoder_q-layer.6": 1598.6117, "encoder_q-layer.7": 1463.0105, "encoder_q-layer.8": 1386.3718, "encoder_q-layer.9": 1184.5999, "epoch": 0.48, "inbatch_neg_score": 0.4171, "inbatch_pos_score": 0.9888, "learning_rate": 1.4611111111111112e-05, "loss": 3.7611, "norm_diff": 0.1163, "norm_loss": 0.0, "num_token_doc": 66.516, "num_token_overlap": 11.6461, "num_token_query": 31.3572, "num_token_union": 64.9967, "num_word_context": 202.1283, "num_word_doc": 49.6424, "num_word_query": 23.3046, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2371.8333, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4167, "query_norm": 1.35, "queue_k_norm": 1.4688, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3572, "sent_len_1": 66.516, "sent_len_max_0": 127.4475, "sent_len_max_1": 189.8475, "stdk": 0.0478, "stdq": 0.0421, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 73700 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.7495, "doc_norm": 1.4753, "encoder_q-embeddings": 1483.918, "encoder_q-layer.0": 1046.689, "encoder_q-layer.1": 1186.9828, "encoder_q-layer.10": 1268.7936, "encoder_q-layer.11": 3258.9609, "encoder_q-layer.2": 1323.5945, "encoder_q-layer.3": 1374.6702, "encoder_q-layer.4": 1533.0745, "encoder_q-layer.5": 1381.6982, "encoder_q-layer.6": 1351.7203, "encoder_q-layer.7": 1306.5085, "encoder_q-layer.8": 1326.2018, "encoder_q-layer.9": 1211.837, "epoch": 0.48, "inbatch_neg_score": 0.4176, "inbatch_pos_score": 0.9863, "learning_rate": 1.4555555555555556e-05, "loss": 3.7495, "norm_diff": 0.1199, "norm_loss": 0.0, "num_token_doc": 67.0688, "num_token_overlap": 11.7033, "num_token_query": 31.3799, "num_token_union": 65.2698, "num_word_context": 202.6775, "num_word_doc": 50.0656, "num_word_query": 23.2929, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2412.0346, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4175, "query_norm": 1.3554, "queue_k_norm": 1.4698, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3799, "sent_len_1": 67.0688, "sent_len_max_0": 127.3525, "sent_len_max_1": 188.4275, "stdk": 0.0481, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 73800 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.7588, "doc_norm": 1.4666, "encoder_q-embeddings": 1486.7349, "encoder_q-layer.0": 1051.5588, "encoder_q-layer.1": 1129.5247, "encoder_q-layer.10": 1433.8282, "encoder_q-layer.11": 3000.1284, "encoder_q-layer.2": 1267.0842, "encoder_q-layer.3": 1286.8854, "encoder_q-layer.4": 1223.6812, "encoder_q-layer.5": 1117.7245, "encoder_q-layer.6": 1211.2223, "encoder_q-layer.7": 1126.624, "encoder_q-layer.8": 1327.1832, "encoder_q-layer.9": 1207.0212, "epoch": 0.48, "inbatch_neg_score": 0.4167, "inbatch_pos_score": 0.9766, "learning_rate": 1.45e-05, "loss": 3.7588, "norm_diff": 0.1328, "norm_loss": 0.0, "num_token_doc": 66.8818, "num_token_overlap": 11.6917, "num_token_query": 31.3472, "num_token_union": 65.1298, "num_word_context": 202.3633, "num_word_doc": 49.892, "num_word_query": 23.2729, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2215.1855, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.417, "query_norm": 1.3338, "queue_k_norm": 1.4699, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3472, "sent_len_1": 66.8818, "sent_len_max_0": 127.5012, "sent_len_max_1": 190.1163, "stdk": 0.0478, "stdq": 0.0414, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 73900 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 3.7228, "doc_norm": 1.4691, "encoder_q-embeddings": 1227.6672, "encoder_q-layer.0": 834.2817, "encoder_q-layer.1": 924.4675, "encoder_q-layer.10": 1367.6387, "encoder_q-layer.11": 3228.4602, "encoder_q-layer.2": 1103.2727, "encoder_q-layer.3": 1138.3727, "encoder_q-layer.4": 1173.4734, "encoder_q-layer.5": 1212.3965, "encoder_q-layer.6": 1412.2938, "encoder_q-layer.7": 1322.3967, "encoder_q-layer.8": 1395.2946, "encoder_q-layer.9": 1314.0935, "epoch": 0.48, "inbatch_neg_score": 0.4189, "inbatch_pos_score": 0.9683, "learning_rate": 1.4444444444444444e-05, "loss": 3.7228, "norm_diff": 0.1063, "norm_loss": 0.0, "num_token_doc": 66.6704, "num_token_overlap": 11.665, "num_token_query": 31.268, "num_token_union": 64.9806, "num_word_context": 202.1613, "num_word_doc": 49.7812, "num_word_query": 23.2314, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2238.7251, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4194, "query_norm": 1.3628, "queue_k_norm": 1.47, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.268, "sent_len_1": 66.6704, "sent_len_max_0": 127.5613, "sent_len_max_1": 188.86, "stdk": 0.0479, "stdq": 0.0426, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 74000 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.7306, "doc_norm": 1.4737, "encoder_q-embeddings": 1100.729, "encoder_q-layer.0": 767.6661, "encoder_q-layer.1": 776.2765, "encoder_q-layer.10": 1248.6071, "encoder_q-layer.11": 3045.6853, "encoder_q-layer.2": 822.9109, "encoder_q-layer.3": 834.7603, "encoder_q-layer.4": 867.6672, "encoder_q-layer.5": 899.1653, "encoder_q-layer.6": 955.0208, "encoder_q-layer.7": 1095.7258, "encoder_q-layer.8": 1144.0088, "encoder_q-layer.9": 1113.7019, "epoch": 0.48, "inbatch_neg_score": 0.4224, "inbatch_pos_score": 0.9814, "learning_rate": 1.438888888888889e-05, "loss": 3.7306, "norm_diff": 0.1113, "norm_loss": 0.0, "num_token_doc": 66.9597, "num_token_overlap": 11.7162, "num_token_query": 31.3345, "num_token_union": 65.1385, "num_word_context": 202.1567, "num_word_doc": 49.9181, "num_word_query": 23.2928, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1977.0443, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4233, "query_norm": 1.3624, "queue_k_norm": 1.4712, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3345, "sent_len_1": 66.9597, "sent_len_max_0": 127.4338, "sent_len_max_1": 192.5712, "stdk": 0.048, "stdq": 0.0424, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 74100 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7392, "doc_norm": 1.4725, "encoder_q-embeddings": 1287.7273, "encoder_q-layer.0": 882.6285, "encoder_q-layer.1": 985.2141, "encoder_q-layer.10": 1228.8066, "encoder_q-layer.11": 3064.3093, "encoder_q-layer.2": 1155.4321, "encoder_q-layer.3": 1151.1293, "encoder_q-layer.4": 1207.8228, "encoder_q-layer.5": 1176.9611, "encoder_q-layer.6": 1215.2148, "encoder_q-layer.7": 1233.7301, "encoder_q-layer.8": 1298.4958, "encoder_q-layer.9": 1152.8652, "epoch": 0.48, "inbatch_neg_score": 0.4224, "inbatch_pos_score": 0.9868, "learning_rate": 1.4333333333333334e-05, "loss": 3.7392, "norm_diff": 0.1156, "norm_loss": 0.0, "num_token_doc": 66.9567, "num_token_overlap": 11.7448, "num_token_query": 31.4389, "num_token_union": 65.2005, "num_word_context": 202.8505, "num_word_doc": 49.8951, "num_word_query": 23.3504, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2178.8538, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4221, "query_norm": 1.357, "queue_k_norm": 1.4733, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4389, "sent_len_1": 66.9567, "sent_len_max_0": 127.4075, "sent_len_max_1": 192.2763, "stdk": 0.048, "stdq": 0.0422, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 74200 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.7358, "doc_norm": 1.4766, "encoder_q-embeddings": 2769.9578, "encoder_q-layer.0": 1933.6552, "encoder_q-layer.1": 2075.7114, "encoder_q-layer.10": 1393.577, "encoder_q-layer.11": 3289.5947, "encoder_q-layer.2": 2319.1741, "encoder_q-layer.3": 2633.7017, "encoder_q-layer.4": 2835.3191, "encoder_q-layer.5": 3306.3757, "encoder_q-layer.6": 3207.5237, "encoder_q-layer.7": 2554.4167, "encoder_q-layer.8": 1981.384, "encoder_q-layer.9": 1386.9517, "epoch": 0.48, "inbatch_neg_score": 0.4226, "inbatch_pos_score": 1.0, "learning_rate": 1.427777777777778e-05, "loss": 3.7358, "norm_diff": 0.1072, "norm_loss": 0.0, "num_token_doc": 66.8067, "num_token_overlap": 11.7143, "num_token_query": 31.4473, "num_token_union": 65.1779, "num_word_context": 202.4495, "num_word_doc": 49.8751, "num_word_query": 23.3534, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3788.3421, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4221, "query_norm": 1.3695, "queue_k_norm": 1.4718, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4473, "sent_len_1": 66.8067, "sent_len_max_0": 127.6262, "sent_len_max_1": 188.855, "stdk": 0.0481, "stdq": 0.0428, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 74300 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.7445, "doc_norm": 1.4681, "encoder_q-embeddings": 1184.2542, "encoder_q-layer.0": 819.6713, "encoder_q-layer.1": 878.5879, "encoder_q-layer.10": 1206.6477, "encoder_q-layer.11": 3193.6323, "encoder_q-layer.2": 962.9526, "encoder_q-layer.3": 970.881, "encoder_q-layer.4": 983.9339, "encoder_q-layer.5": 978.4279, "encoder_q-layer.6": 1033.5316, "encoder_q-layer.7": 1136.769, "encoder_q-layer.8": 1244.4609, "encoder_q-layer.9": 1142.353, "epoch": 0.48, "inbatch_neg_score": 0.4243, "inbatch_pos_score": 0.9751, "learning_rate": 1.4222222222222224e-05, "loss": 3.7445, "norm_diff": 0.1186, "norm_loss": 0.0, "num_token_doc": 66.8689, "num_token_overlap": 11.697, "num_token_query": 31.4218, "num_token_union": 65.1676, "num_word_context": 202.4347, "num_word_doc": 49.8957, "num_word_query": 23.3532, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2118.5275, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.425, "query_norm": 1.3495, "queue_k_norm": 1.4737, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4218, "sent_len_1": 66.8689, "sent_len_max_0": 127.305, "sent_len_max_1": 190.7088, "stdk": 0.0478, "stdq": 0.0419, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 74400 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.7454, "doc_norm": 1.4738, "encoder_q-embeddings": 1256.4447, "encoder_q-layer.0": 822.0399, "encoder_q-layer.1": 931.4583, "encoder_q-layer.10": 1153.8094, "encoder_q-layer.11": 3035.0676, "encoder_q-layer.2": 1048.2885, "encoder_q-layer.3": 1088.1519, "encoder_q-layer.4": 1039.6068, "encoder_q-layer.5": 1051.3613, "encoder_q-layer.6": 1147.6766, "encoder_q-layer.7": 1174.5684, "encoder_q-layer.8": 1272.7979, "encoder_q-layer.9": 1154.9398, "epoch": 0.48, "inbatch_neg_score": 0.4268, "inbatch_pos_score": 1.0039, "learning_rate": 1.4166666666666668e-05, "loss": 3.7454, "norm_diff": 0.1108, "norm_loss": 0.0, "num_token_doc": 66.4953, "num_token_overlap": 11.7175, "num_token_query": 31.5105, "num_token_union": 64.9997, "num_word_context": 202.0488, "num_word_doc": 49.6048, "num_word_query": 23.4209, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2100.0491, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4263, "query_norm": 1.3631, "queue_k_norm": 1.4713, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5105, "sent_len_1": 66.4953, "sent_len_max_0": 127.5725, "sent_len_max_1": 190.6925, "stdk": 0.048, "stdq": 0.0424, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 74500 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.739, "doc_norm": 1.4741, "encoder_q-embeddings": 1126.302, "encoder_q-layer.0": 756.1512, "encoder_q-layer.1": 820.1757, "encoder_q-layer.10": 1294.365, "encoder_q-layer.11": 3217.8284, "encoder_q-layer.2": 967.5854, "encoder_q-layer.3": 981.6117, "encoder_q-layer.4": 1008.2323, "encoder_q-layer.5": 994.0373, "encoder_q-layer.6": 1037.3842, "encoder_q-layer.7": 1153.3716, "encoder_q-layer.8": 1386.0615, "encoder_q-layer.9": 1204.0742, "epoch": 0.49, "inbatch_neg_score": 0.4281, "inbatch_pos_score": 0.9966, "learning_rate": 1.4111111111111112e-05, "loss": 3.739, "norm_diff": 0.1181, "norm_loss": 0.0, "num_token_doc": 66.6632, "num_token_overlap": 11.6524, "num_token_query": 31.3477, "num_token_union": 65.0624, "num_word_context": 202.3039, "num_word_doc": 49.7554, "num_word_query": 23.2975, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2104.3251, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4277, "query_norm": 1.356, "queue_k_norm": 1.4732, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3477, "sent_len_1": 66.6632, "sent_len_max_0": 127.4463, "sent_len_max_1": 190.2463, "stdk": 0.0479, "stdq": 0.0421, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 74600 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.7639, "doc_norm": 1.4769, "encoder_q-embeddings": 991.593, "encoder_q-layer.0": 682.4005, "encoder_q-layer.1": 714.5449, "encoder_q-layer.10": 1206.4685, "encoder_q-layer.11": 3047.2417, "encoder_q-layer.2": 790.9348, "encoder_q-layer.3": 818.4279, "encoder_q-layer.4": 864.5013, "encoder_q-layer.5": 880.1308, "encoder_q-layer.6": 968.2776, "encoder_q-layer.7": 1067.5732, "encoder_q-layer.8": 1240.2419, "encoder_q-layer.9": 1137.1558, "epoch": 0.49, "inbatch_neg_score": 0.4241, "inbatch_pos_score": 0.9976, "learning_rate": 1.4055555555555556e-05, "loss": 3.7639, "norm_diff": 0.1104, "norm_loss": 0.0, "num_token_doc": 67.0863, "num_token_overlap": 11.6738, "num_token_query": 31.367, "num_token_union": 65.2723, "num_word_context": 202.5988, "num_word_doc": 50.0468, "num_word_query": 23.2649, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1939.5948, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4253, "query_norm": 1.3665, "queue_k_norm": 1.4749, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.367, "sent_len_1": 67.0863, "sent_len_max_0": 127.485, "sent_len_max_1": 190.1813, "stdk": 0.0481, "stdq": 0.0427, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 74700 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.7437, "doc_norm": 1.4809, "encoder_q-embeddings": 1075.6226, "encoder_q-layer.0": 702.4779, "encoder_q-layer.1": 751.7612, "encoder_q-layer.10": 1174.3596, "encoder_q-layer.11": 3021.4028, "encoder_q-layer.2": 841.8952, "encoder_q-layer.3": 878.8064, "encoder_q-layer.4": 921.7238, "encoder_q-layer.5": 933.8536, "encoder_q-layer.6": 1005.2966, "encoder_q-layer.7": 1065.8431, "encoder_q-layer.8": 1214.4771, "encoder_q-layer.9": 1121.907, "epoch": 0.49, "inbatch_neg_score": 0.4281, "inbatch_pos_score": 1.0342, "learning_rate": 1.4000000000000001e-05, "loss": 3.7437, "norm_diff": 0.1062, "norm_loss": 0.0, "num_token_doc": 66.6579, "num_token_overlap": 11.6756, "num_token_query": 31.4033, "num_token_union": 65.0916, "num_word_context": 202.0324, "num_word_doc": 49.7668, "num_word_query": 23.3279, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1969.5683, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4282, "query_norm": 1.3748, "queue_k_norm": 1.4754, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4033, "sent_len_1": 66.6579, "sent_len_max_0": 127.3937, "sent_len_max_1": 189.035, "stdk": 0.0482, "stdq": 0.0429, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 74800 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.7424, "doc_norm": 1.4721, "encoder_q-embeddings": 1011.8402, "encoder_q-layer.0": 665.2238, "encoder_q-layer.1": 697.804, "encoder_q-layer.10": 1472.5688, "encoder_q-layer.11": 3238.6292, "encoder_q-layer.2": 797.8813, "encoder_q-layer.3": 805.6939, "encoder_q-layer.4": 807.2686, "encoder_q-layer.5": 809.6445, "encoder_q-layer.6": 943.525, "encoder_q-layer.7": 1050.0076, "encoder_q-layer.8": 1319.7871, "encoder_q-layer.9": 1235.2716, "epoch": 0.49, "inbatch_neg_score": 0.4304, "inbatch_pos_score": 0.9912, "learning_rate": 1.3944444444444446e-05, "loss": 3.7424, "norm_diff": 0.111, "norm_loss": 0.0, "num_token_doc": 66.7285, "num_token_overlap": 11.6749, "num_token_query": 31.351, "num_token_union": 65.0904, "num_word_context": 202.5654, "num_word_doc": 49.8021, "num_word_query": 23.2657, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2022.6831, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.429, "query_norm": 1.3611, "queue_k_norm": 1.475, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.351, "sent_len_1": 66.7285, "sent_len_max_0": 127.3312, "sent_len_max_1": 188.3663, "stdk": 0.0478, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 74900 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.7292, "doc_norm": 1.4766, "encoder_q-embeddings": 1146.1189, "encoder_q-layer.0": 748.2019, "encoder_q-layer.1": 834.0121, "encoder_q-layer.10": 1224.4637, "encoder_q-layer.11": 3036.2747, "encoder_q-layer.2": 949.3791, "encoder_q-layer.3": 962.7862, "encoder_q-layer.4": 999.726, "encoder_q-layer.5": 1026.4675, "encoder_q-layer.6": 1074.4088, "encoder_q-layer.7": 1177.3555, "encoder_q-layer.8": 1284.6914, "encoder_q-layer.9": 1130.5046, "epoch": 0.49, "inbatch_neg_score": 0.4246, "inbatch_pos_score": 0.9834, "learning_rate": 1.388888888888889e-05, "loss": 3.7292, "norm_diff": 0.1154, "norm_loss": 0.0, "num_token_doc": 66.6933, "num_token_overlap": 11.7085, "num_token_query": 31.4449, "num_token_union": 65.071, "num_word_context": 202.0019, "num_word_doc": 49.7458, "num_word_query": 23.3482, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2042.3825, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4248, "query_norm": 1.3611, "queue_k_norm": 1.4762, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4449, "sent_len_1": 66.6933, "sent_len_max_0": 127.4762, "sent_len_max_1": 190.6887, "stdk": 0.048, "stdq": 0.0425, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 75000 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.7298, "doc_norm": 1.4717, "encoder_q-embeddings": 1088.5443, "encoder_q-layer.0": 742.1802, "encoder_q-layer.1": 807.2396, "encoder_q-layer.10": 1378.4978, "encoder_q-layer.11": 3344.0833, "encoder_q-layer.2": 910.0562, "encoder_q-layer.3": 951.6712, "encoder_q-layer.4": 1005.8549, "encoder_q-layer.5": 1011.0688, "encoder_q-layer.6": 1063.1552, "encoder_q-layer.7": 1164.2344, "encoder_q-layer.8": 1331.7832, "encoder_q-layer.9": 1258.1145, "epoch": 0.49, "inbatch_neg_score": 0.4222, "inbatch_pos_score": 0.9819, "learning_rate": 1.3833333333333334e-05, "loss": 3.7298, "norm_diff": 0.099, "norm_loss": 0.0, "num_token_doc": 66.8537, "num_token_overlap": 11.735, "num_token_query": 31.6513, "num_token_union": 65.342, "num_word_context": 202.358, "num_word_doc": 49.9121, "num_word_query": 23.5218, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2125.0604, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4216, "query_norm": 1.3727, "queue_k_norm": 1.4743, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.6513, "sent_len_1": 66.8537, "sent_len_max_0": 127.6287, "sent_len_max_1": 187.8363, "stdk": 0.0478, "stdq": 0.043, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 75100 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7418, "doc_norm": 1.4788, "encoder_q-embeddings": 1170.5521, "encoder_q-layer.0": 795.2938, "encoder_q-layer.1": 871.4872, "encoder_q-layer.10": 1239.4976, "encoder_q-layer.11": 3014.3762, "encoder_q-layer.2": 987.2349, "encoder_q-layer.3": 1042.0836, "encoder_q-layer.4": 1050.4791, "encoder_q-layer.5": 1027.8513, "encoder_q-layer.6": 1134.6274, "encoder_q-layer.7": 1157.0192, "encoder_q-layer.8": 1262.7506, "encoder_q-layer.9": 1108.0839, "epoch": 0.49, "inbatch_neg_score": 0.4213, "inbatch_pos_score": 0.9941, "learning_rate": 1.3777777777777778e-05, "loss": 3.7418, "norm_diff": 0.1208, "norm_loss": 0.0, "num_token_doc": 66.6098, "num_token_overlap": 11.7133, "num_token_query": 31.5253, "num_token_union": 65.0895, "num_word_context": 202.1946, "num_word_doc": 49.7033, "num_word_query": 23.4181, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2028.172, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4214, "query_norm": 1.358, "queue_k_norm": 1.4747, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5253, "sent_len_1": 66.6098, "sent_len_max_0": 127.5537, "sent_len_max_1": 188.765, "stdk": 0.0481, "stdq": 0.0424, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 75200 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.7397, "doc_norm": 1.4796, "encoder_q-embeddings": 1502.8263, "encoder_q-layer.0": 1052.9893, "encoder_q-layer.1": 1159.7225, "encoder_q-layer.10": 1206.3036, "encoder_q-layer.11": 3095.708, "encoder_q-layer.2": 1371.1235, "encoder_q-layer.3": 1538.1454, "encoder_q-layer.4": 1625.5901, "encoder_q-layer.5": 1742.5045, "encoder_q-layer.6": 1912.0626, "encoder_q-layer.7": 1800.3501, "encoder_q-layer.8": 1467.5012, "encoder_q-layer.9": 1151.0297, "epoch": 0.49, "inbatch_neg_score": 0.4281, "inbatch_pos_score": 0.9946, "learning_rate": 1.3722222222222222e-05, "loss": 3.7397, "norm_diff": 0.1234, "norm_loss": 0.0, "num_token_doc": 67.0674, "num_token_overlap": 11.7047, "num_token_query": 31.4504, "num_token_union": 65.3184, "num_word_context": 202.6855, "num_word_doc": 50.0486, "num_word_query": 23.3548, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2541.458, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4265, "query_norm": 1.3561, "queue_k_norm": 1.4781, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4504, "sent_len_1": 67.0674, "sent_len_max_0": 127.5875, "sent_len_max_1": 189.7337, "stdk": 0.048, "stdq": 0.0421, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 75300 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.7698, "doc_norm": 1.4759, "encoder_q-embeddings": 1457.5446, "encoder_q-layer.0": 1005.4572, "encoder_q-layer.1": 1098.4312, "encoder_q-layer.10": 1225.703, "encoder_q-layer.11": 3102.1458, "encoder_q-layer.2": 1320.4596, "encoder_q-layer.3": 1297.946, "encoder_q-layer.4": 1379.8669, "encoder_q-layer.5": 1295.3207, "encoder_q-layer.6": 1172.1705, "encoder_q-layer.7": 1179.1359, "encoder_q-layer.8": 1250.0265, "encoder_q-layer.9": 1164.6926, "epoch": 0.49, "inbatch_neg_score": 0.4295, "inbatch_pos_score": 0.9688, "learning_rate": 1.3666666666666666e-05, "loss": 3.7698, "norm_diff": 0.1196, "norm_loss": 0.0, "num_token_doc": 66.6189, "num_token_overlap": 11.6284, "num_token_query": 31.4232, "num_token_union": 65.1294, "num_word_context": 202.2306, "num_word_doc": 49.7448, "num_word_query": 23.3187, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2276.4622, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4287, "query_norm": 1.3563, "queue_k_norm": 1.4751, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4232, "sent_len_1": 66.6189, "sent_len_max_0": 127.4463, "sent_len_max_1": 187.7088, "stdk": 0.0479, "stdq": 0.042, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 75400 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.752, "doc_norm": 1.4786, "encoder_q-embeddings": 2683.2036, "encoder_q-layer.0": 1855.0714, "encoder_q-layer.1": 1997.5413, "encoder_q-layer.10": 2520.2715, "encoder_q-layer.11": 6306.5669, "encoder_q-layer.2": 2221.2419, "encoder_q-layer.3": 2298.7554, "encoder_q-layer.4": 2447.0054, "encoder_q-layer.5": 2623.9202, "encoder_q-layer.6": 2570.905, "encoder_q-layer.7": 2645.5769, "encoder_q-layer.8": 2774.8933, "encoder_q-layer.9": 2335.2781, "epoch": 0.49, "inbatch_neg_score": 0.4281, "inbatch_pos_score": 0.9976, "learning_rate": 1.3611111111111111e-05, "loss": 3.752, "norm_diff": 0.1195, "norm_loss": 0.0, "num_token_doc": 66.7357, "num_token_overlap": 11.6592, "num_token_query": 31.3216, "num_token_union": 65.0693, "num_word_context": 202.2055, "num_word_doc": 49.8064, "num_word_query": 23.2535, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4533.2, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4277, "query_norm": 1.3591, "queue_k_norm": 1.4756, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3216, "sent_len_1": 66.7357, "sent_len_max_0": 127.5088, "sent_len_max_1": 188.9563, "stdk": 0.048, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 75500 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.7328, "doc_norm": 1.4801, "encoder_q-embeddings": 2082.363, "encoder_q-layer.0": 1405.9631, "encoder_q-layer.1": 1416.64, "encoder_q-layer.10": 2622.7979, "encoder_q-layer.11": 6573.3467, "encoder_q-layer.2": 1601.4062, "encoder_q-layer.3": 1683.012, "encoder_q-layer.4": 1742.2701, "encoder_q-layer.5": 1765.3445, "encoder_q-layer.6": 1977.9553, "encoder_q-layer.7": 2201.5349, "encoder_q-layer.8": 2678.4802, "encoder_q-layer.9": 2468.2195, "epoch": 0.49, "inbatch_neg_score": 0.4279, "inbatch_pos_score": 0.9927, "learning_rate": 1.3555555555555557e-05, "loss": 3.7328, "norm_diff": 0.1066, "norm_loss": 0.0, "num_token_doc": 66.749, "num_token_overlap": 11.6087, "num_token_query": 31.1945, "num_token_union": 65.0164, "num_word_context": 202.3334, "num_word_doc": 49.8224, "num_word_query": 23.1359, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4077.3214, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4282, "query_norm": 1.3736, "queue_k_norm": 1.4741, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.1945, "sent_len_1": 66.749, "sent_len_max_0": 127.2537, "sent_len_max_1": 187.2413, "stdk": 0.0481, "stdq": 0.0429, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 75600 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 3.7472, "doc_norm": 1.4759, "encoder_q-embeddings": 2431.9172, "encoder_q-layer.0": 1671.2362, "encoder_q-layer.1": 1739.6411, "encoder_q-layer.10": 2701.6858, "encoder_q-layer.11": 6273.5239, "encoder_q-layer.2": 1995.1067, "encoder_q-layer.3": 2026.0082, "encoder_q-layer.4": 2182.5745, "encoder_q-layer.5": 2145.6504, "encoder_q-layer.6": 2199.4214, "encoder_q-layer.7": 2169.8962, "encoder_q-layer.8": 2459.9719, "encoder_q-layer.9": 2309.0093, "epoch": 0.49, "inbatch_neg_score": 0.4322, "inbatch_pos_score": 0.9756, "learning_rate": 1.3500000000000001e-05, "loss": 3.7472, "norm_diff": 0.1124, "norm_loss": 0.0, "num_token_doc": 66.8022, "num_token_overlap": 11.6801, "num_token_query": 31.5048, "num_token_union": 65.1923, "num_word_context": 202.3474, "num_word_doc": 49.8204, "num_word_query": 23.3963, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4231.036, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4321, "query_norm": 1.3635, "queue_k_norm": 1.4771, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5048, "sent_len_1": 66.8022, "sent_len_max_0": 127.5225, "sent_len_max_1": 189.4925, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 75700 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.7326, "doc_norm": 1.4758, "encoder_q-embeddings": 2429.843, "encoder_q-layer.0": 1727.922, "encoder_q-layer.1": 1864.8298, "encoder_q-layer.10": 2524.5447, "encoder_q-layer.11": 6021.4858, "encoder_q-layer.2": 2032.8391, "encoder_q-layer.3": 2119.0342, "encoder_q-layer.4": 2130.1465, "encoder_q-layer.5": 2147.0056, "encoder_q-layer.6": 2287.2761, "encoder_q-layer.7": 2279.0137, "encoder_q-layer.8": 2690.0889, "encoder_q-layer.9": 2375.8379, "epoch": 0.49, "inbatch_neg_score": 0.4325, "inbatch_pos_score": 0.9883, "learning_rate": 1.3444444444444445e-05, "loss": 3.7326, "norm_diff": 0.1107, "norm_loss": 0.0, "num_token_doc": 66.8592, "num_token_overlap": 11.6433, "num_token_query": 31.3262, "num_token_union": 65.2203, "num_word_context": 202.4709, "num_word_doc": 49.927, "num_word_query": 23.2552, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4129.8053, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4336, "query_norm": 1.3651, "queue_k_norm": 1.479, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3262, "sent_len_1": 66.8592, "sent_len_max_0": 127.3413, "sent_len_max_1": 188.72, "stdk": 0.0479, "stdq": 0.0425, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 75800 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.744, "doc_norm": 1.4773, "encoder_q-embeddings": 3014.1113, "encoder_q-layer.0": 2229.7693, "encoder_q-layer.1": 2386.1948, "encoder_q-layer.10": 2448.9844, "encoder_q-layer.11": 6186.1377, "encoder_q-layer.2": 2529.4883, "encoder_q-layer.3": 2958.3103, "encoder_q-layer.4": 3128.3962, "encoder_q-layer.5": 3119.8276, "encoder_q-layer.6": 3199.8186, "encoder_q-layer.7": 2711.9094, "encoder_q-layer.8": 2649.9282, "encoder_q-layer.9": 2358.03, "epoch": 0.49, "inbatch_neg_score": 0.4387, "inbatch_pos_score": 1.0039, "learning_rate": 1.338888888888889e-05, "loss": 3.744, "norm_diff": 0.1182, "norm_loss": 0.0, "num_token_doc": 66.7149, "num_token_overlap": 11.6591, "num_token_query": 31.3123, "num_token_union": 65.052, "num_word_context": 202.2203, "num_word_doc": 49.7642, "num_word_query": 23.2447, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4759.6954, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.438, "query_norm": 1.3591, "queue_k_norm": 1.4761, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3123, "sent_len_1": 66.7149, "sent_len_max_0": 127.5337, "sent_len_max_1": 190.22, "stdk": 0.0479, "stdq": 0.042, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 75900 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 3.7425, "doc_norm": 1.4733, "encoder_q-embeddings": 2301.2883, "encoder_q-layer.0": 1500.2524, "encoder_q-layer.1": 1606.7302, "encoder_q-layer.10": 2600.3706, "encoder_q-layer.11": 6261.4175, "encoder_q-layer.2": 1841.9172, "encoder_q-layer.3": 1891.8743, "encoder_q-layer.4": 1942.8801, "encoder_q-layer.5": 1947.5537, "encoder_q-layer.6": 2049.6528, "encoder_q-layer.7": 2179.6492, "encoder_q-layer.8": 2532.8926, "encoder_q-layer.9": 2336.3049, "epoch": 0.49, "inbatch_neg_score": 0.4438, "inbatch_pos_score": 0.9805, "learning_rate": 1.3333333333333333e-05, "loss": 3.7425, "norm_diff": 0.113, "norm_loss": 0.0, "num_token_doc": 66.8042, "num_token_overlap": 11.7187, "num_token_query": 31.551, "num_token_union": 65.2174, "num_word_context": 202.5308, "num_word_doc": 49.8697, "num_word_query": 23.4498, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4108.389, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4431, "query_norm": 1.3603, "queue_k_norm": 1.4772, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.551, "sent_len_1": 66.8042, "sent_len_max_0": 127.4575, "sent_len_max_1": 189.27, "stdk": 0.0477, "stdq": 0.0421, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 76000 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.7347, "doc_norm": 1.4787, "encoder_q-embeddings": 2303.4021, "encoder_q-layer.0": 1599.4528, "encoder_q-layer.1": 1758.2328, "encoder_q-layer.10": 2566.0847, "encoder_q-layer.11": 6685.1294, "encoder_q-layer.2": 1892.4254, "encoder_q-layer.3": 1964.5748, "encoder_q-layer.4": 1987.1426, "encoder_q-layer.5": 1961.8203, "encoder_q-layer.6": 2079.5269, "encoder_q-layer.7": 2236.9978, "encoder_q-layer.8": 2591.0596, "encoder_q-layer.9": 2357.0789, "epoch": 0.5, "inbatch_neg_score": 0.4476, "inbatch_pos_score": 0.999, "learning_rate": 1.3277777777777777e-05, "loss": 3.7347, "norm_diff": 0.1142, "norm_loss": 0.0, "num_token_doc": 66.6005, "num_token_overlap": 11.647, "num_token_query": 31.3527, "num_token_union": 65.0484, "num_word_context": 202.3129, "num_word_doc": 49.6608, "num_word_query": 23.2864, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4342.1448, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4463, "query_norm": 1.3645, "queue_k_norm": 1.4787, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3527, "sent_len_1": 66.6005, "sent_len_max_0": 127.6937, "sent_len_max_1": 188.5938, "stdk": 0.0479, "stdq": 0.0422, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 76100 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.7583, "doc_norm": 1.4708, "encoder_q-embeddings": 4071.4082, "encoder_q-layer.0": 2977.2952, "encoder_q-layer.1": 3441.9514, "encoder_q-layer.10": 2813.2571, "encoder_q-layer.11": 6815.9082, "encoder_q-layer.2": 3830.1643, "encoder_q-layer.3": 4324.4546, "encoder_q-layer.4": 4543.626, "encoder_q-layer.5": 4418.9985, "encoder_q-layer.6": 4073.7881, "encoder_q-layer.7": 3382.8325, "encoder_q-layer.8": 3358.2454, "encoder_q-layer.9": 2801.4587, "epoch": 0.5, "inbatch_neg_score": 0.448, "inbatch_pos_score": 1.0088, "learning_rate": 1.3222222222222221e-05, "loss": 3.7583, "norm_diff": 0.0923, "norm_loss": 0.0, "num_token_doc": 66.8141, "num_token_overlap": 11.6594, "num_token_query": 31.3842, "num_token_union": 65.1197, "num_word_context": 202.4746, "num_word_doc": 49.8013, "num_word_query": 23.2874, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5986.4971, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4478, "query_norm": 1.3786, "queue_k_norm": 1.4792, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3842, "sent_len_1": 66.8141, "sent_len_max_0": 127.64, "sent_len_max_1": 192.8425, "stdk": 0.0476, "stdq": 0.0428, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 76200 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.7505, "doc_norm": 1.4711, "encoder_q-embeddings": 2891.7021, "encoder_q-layer.0": 1818.236, "encoder_q-layer.1": 1969.8865, "encoder_q-layer.10": 2490.9348, "encoder_q-layer.11": 6314.9292, "encoder_q-layer.2": 2365.147, "encoder_q-layer.3": 2699.0037, "encoder_q-layer.4": 2789.9702, "encoder_q-layer.5": 3049.8032, "encoder_q-layer.6": 3011.0698, "encoder_q-layer.7": 3195.4248, "encoder_q-layer.8": 2864.843, "encoder_q-layer.9": 2364.981, "epoch": 0.5, "inbatch_neg_score": 0.4482, "inbatch_pos_score": 1.0137, "learning_rate": 1.3166666666666665e-05, "loss": 3.7505, "norm_diff": 0.1072, "norm_loss": 0.0, "num_token_doc": 66.786, "num_token_overlap": 11.6511, "num_token_query": 31.3546, "num_token_union": 65.1118, "num_word_context": 202.2405, "num_word_doc": 49.8484, "num_word_query": 23.276, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4728.308, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.448, "query_norm": 1.3639, "queue_k_norm": 1.4796, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3546, "sent_len_1": 66.786, "sent_len_max_0": 127.5375, "sent_len_max_1": 189.27, "stdk": 0.0476, "stdq": 0.0421, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 76300 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.7308, "doc_norm": 1.474, "encoder_q-embeddings": 2802.792, "encoder_q-layer.0": 1895.4247, "encoder_q-layer.1": 2041.269, "encoder_q-layer.10": 2442.2212, "encoder_q-layer.11": 6167.3252, "encoder_q-layer.2": 2301.7031, "encoder_q-layer.3": 2244.9543, "encoder_q-layer.4": 2348.9202, "encoder_q-layer.5": 2399.3618, "encoder_q-layer.6": 2447.564, "encoder_q-layer.7": 2441.3127, "encoder_q-layer.8": 2606.3442, "encoder_q-layer.9": 2357.4153, "epoch": 0.5, "inbatch_neg_score": 0.4506, "inbatch_pos_score": 1.0088, "learning_rate": 1.3111111111111113e-05, "loss": 3.7308, "norm_diff": 0.1148, "norm_loss": 0.0, "num_token_doc": 66.7916, "num_token_overlap": 11.7021, "num_token_query": 31.4674, "num_token_union": 65.1904, "num_word_context": 202.6536, "num_word_doc": 49.8536, "num_word_query": 23.4, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4392.7907, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4502, "query_norm": 1.3592, "queue_k_norm": 1.4804, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4674, "sent_len_1": 66.7916, "sent_len_max_0": 127.5913, "sent_len_max_1": 189.605, "stdk": 0.0477, "stdq": 0.042, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 76400 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.7222, "doc_norm": 1.4814, "encoder_q-embeddings": 2375.9116, "encoder_q-layer.0": 1574.7885, "encoder_q-layer.1": 1711.2927, "encoder_q-layer.10": 2638.8521, "encoder_q-layer.11": 6463.2793, "encoder_q-layer.2": 2034.2794, "encoder_q-layer.3": 1969.1398, "encoder_q-layer.4": 1883.8641, "encoder_q-layer.5": 1804.8859, "encoder_q-layer.6": 1946.2017, "encoder_q-layer.7": 2205.5864, "encoder_q-layer.8": 2583.6545, "encoder_q-layer.9": 2368.6135, "epoch": 0.5, "inbatch_neg_score": 0.4498, "inbatch_pos_score": 1.0244, "learning_rate": 1.3055555555555557e-05, "loss": 3.7222, "norm_diff": 0.1149, "norm_loss": 0.0, "num_token_doc": 66.7027, "num_token_overlap": 11.7091, "num_token_query": 31.381, "num_token_union": 65.0524, "num_word_context": 202.0097, "num_word_doc": 49.7205, "num_word_query": 23.2866, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4218.7565, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4509, "query_norm": 1.3665, "queue_k_norm": 1.4822, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.381, "sent_len_1": 66.7027, "sent_len_max_0": 127.425, "sent_len_max_1": 189.0087, "stdk": 0.048, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 76500 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.7423, "doc_norm": 1.4736, "encoder_q-embeddings": 2382.678, "encoder_q-layer.0": 1631.9717, "encoder_q-layer.1": 1709.5847, "encoder_q-layer.10": 2666.5122, "encoder_q-layer.11": 6297.6006, "encoder_q-layer.2": 1952.4154, "encoder_q-layer.3": 1927.2529, "encoder_q-layer.4": 2042.7207, "encoder_q-layer.5": 2176.3186, "encoder_q-layer.6": 2294.5066, "encoder_q-layer.7": 2535.8501, "encoder_q-layer.8": 2759.6677, "encoder_q-layer.9": 2482.6687, "epoch": 0.5, "inbatch_neg_score": 0.4532, "inbatch_pos_score": 1.002, "learning_rate": 1.3000000000000001e-05, "loss": 3.7423, "norm_diff": 0.1078, "norm_loss": 0.0, "num_token_doc": 66.7461, "num_token_overlap": 11.6985, "num_token_query": 31.3905, "num_token_union": 65.0707, "num_word_context": 202.2538, "num_word_doc": 49.7765, "num_word_query": 23.313, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4227.426, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4524, "query_norm": 1.3658, "queue_k_norm": 1.4812, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3905, "sent_len_1": 66.7461, "sent_len_max_0": 127.52, "sent_len_max_1": 189.3625, "stdk": 0.0476, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 76600 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.752, "doc_norm": 1.4844, "encoder_q-embeddings": 2379.4526, "encoder_q-layer.0": 1574.246, "encoder_q-layer.1": 1687.7476, "encoder_q-layer.10": 2748.2734, "encoder_q-layer.11": 6409.3618, "encoder_q-layer.2": 1817.828, "encoder_q-layer.3": 1945.4458, "encoder_q-layer.4": 1990.2322, "encoder_q-layer.5": 1913.1904, "encoder_q-layer.6": 1967.6268, "encoder_q-layer.7": 2174.9294, "encoder_q-layer.8": 2853.6174, "encoder_q-layer.9": 2498.5161, "epoch": 0.5, "inbatch_neg_score": 0.4564, "inbatch_pos_score": 1.0293, "learning_rate": 1.2944444444444445e-05, "loss": 3.752, "norm_diff": 0.1136, "norm_loss": 0.0, "num_token_doc": 66.856, "num_token_overlap": 11.6794, "num_token_query": 31.3222, "num_token_union": 65.1441, "num_word_context": 202.3231, "num_word_doc": 49.8965, "num_word_query": 23.2725, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4205.0454, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4556, "query_norm": 1.3708, "queue_k_norm": 1.4836, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3222, "sent_len_1": 66.856, "sent_len_max_0": 127.5162, "sent_len_max_1": 188.6637, "stdk": 0.048, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 76700 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.7536, "doc_norm": 1.4834, "encoder_q-embeddings": 4936.1836, "encoder_q-layer.0": 3631.885, "encoder_q-layer.1": 4142.1357, "encoder_q-layer.10": 2546.9949, "encoder_q-layer.11": 6592.3311, "encoder_q-layer.2": 4813.3252, "encoder_q-layer.3": 5044.6094, "encoder_q-layer.4": 4779.4136, "encoder_q-layer.5": 5027.1602, "encoder_q-layer.6": 4803.4595, "encoder_q-layer.7": 4416.0425, "encoder_q-layer.8": 3457.4043, "encoder_q-layer.9": 2630.2747, "epoch": 0.5, "inbatch_neg_score": 0.4557, "inbatch_pos_score": 1.0176, "learning_rate": 1.2888888888888889e-05, "loss": 3.7536, "norm_diff": 0.1139, "norm_loss": 0.0, "num_token_doc": 66.5418, "num_token_overlap": 11.5765, "num_token_query": 31.1899, "num_token_union": 64.9508, "num_word_context": 202.1151, "num_word_doc": 49.6621, "num_word_query": 23.1416, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6755.4837, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4556, "query_norm": 1.3695, "queue_k_norm": 1.4823, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.1899, "sent_len_1": 66.5418, "sent_len_max_0": 127.4862, "sent_len_max_1": 190.68, "stdk": 0.048, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 76800 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7089, "doc_norm": 1.4817, "encoder_q-embeddings": 2019.8326, "encoder_q-layer.0": 1383.9738, "encoder_q-layer.1": 1416.9211, "encoder_q-layer.10": 2883.3044, "encoder_q-layer.11": 6628.251, "encoder_q-layer.2": 1591.1945, "encoder_q-layer.3": 1666.432, "encoder_q-layer.4": 1746.5012, "encoder_q-layer.5": 1787.3121, "encoder_q-layer.6": 1975.6564, "encoder_q-layer.7": 2127.5317, "encoder_q-layer.8": 2425.1133, "encoder_q-layer.9": 2366.8447, "epoch": 0.5, "inbatch_neg_score": 0.4531, "inbatch_pos_score": 1.0293, "learning_rate": 1.2833333333333333e-05, "loss": 3.7089, "norm_diff": 0.1181, "norm_loss": 0.0, "num_token_doc": 66.8209, "num_token_overlap": 11.6854, "num_token_query": 31.3945, "num_token_union": 65.1763, "num_word_context": 202.2229, "num_word_doc": 49.8527, "num_word_query": 23.3273, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4063.2203, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4543, "query_norm": 1.3636, "queue_k_norm": 1.4827, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3945, "sent_len_1": 66.8209, "sent_len_max_0": 127.575, "sent_len_max_1": 190.3512, "stdk": 0.0479, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 76900 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 3.7452, "doc_norm": 1.4834, "encoder_q-embeddings": 2690.7129, "encoder_q-layer.0": 1811.8577, "encoder_q-layer.1": 2015.2742, "encoder_q-layer.10": 2709.0847, "encoder_q-layer.11": 6467.2065, "encoder_q-layer.2": 2334.7534, "encoder_q-layer.3": 2482.1382, "encoder_q-layer.4": 2649.2922, "encoder_q-layer.5": 2634.1355, "encoder_q-layer.6": 2539.9197, "encoder_q-layer.7": 2472.1045, "encoder_q-layer.8": 2810.5657, "encoder_q-layer.9": 2426.7043, "epoch": 0.5, "inbatch_neg_score": 0.4571, "inbatch_pos_score": 0.998, "learning_rate": 1.2777777777777777e-05, "loss": 3.7452, "norm_diff": 0.1259, "norm_loss": 0.0, "num_token_doc": 66.7494, "num_token_overlap": 11.6565, "num_token_query": 31.3429, "num_token_union": 65.1012, "num_word_context": 202.1661, "num_word_doc": 49.813, "num_word_query": 23.2834, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4612.9889, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.457, "query_norm": 1.3575, "queue_k_norm": 1.483, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3429, "sent_len_1": 66.7494, "sent_len_max_0": 127.4838, "sent_len_max_1": 190.3063, "stdk": 0.0479, "stdq": 0.0419, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 77000 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.7324, "doc_norm": 1.4791, "encoder_q-embeddings": 2761.3955, "encoder_q-layer.0": 1882.9801, "encoder_q-layer.1": 2111.3801, "encoder_q-layer.10": 2535.5073, "encoder_q-layer.11": 6121.3428, "encoder_q-layer.2": 2303.0996, "encoder_q-layer.3": 2453.2205, "encoder_q-layer.4": 2621.2974, "encoder_q-layer.5": 2600.4648, "encoder_q-layer.6": 2422.02, "encoder_q-layer.7": 2370.8179, "encoder_q-layer.8": 2679.2156, "encoder_q-layer.9": 2385.0586, "epoch": 0.5, "inbatch_neg_score": 0.4568, "inbatch_pos_score": 1.041, "learning_rate": 1.2722222222222221e-05, "loss": 3.7324, "norm_diff": 0.0926, "norm_loss": 0.0, "num_token_doc": 66.8185, "num_token_overlap": 11.7306, "num_token_query": 31.5085, "num_token_union": 65.1765, "num_word_context": 202.5437, "num_word_doc": 49.8421, "num_word_query": 23.4182, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4420.1438, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4565, "query_norm": 1.3866, "queue_k_norm": 1.4845, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.5085, "sent_len_1": 66.8185, "sent_len_max_0": 127.5613, "sent_len_max_1": 189.6825, "stdk": 0.0477, "stdq": 0.0432, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 77100 }, { "accuracy": 49.1211, "active_queue_size": 16384.0, "cl_loss": 3.7248, "doc_norm": 1.4905, "encoder_q-embeddings": 2352.1799, "encoder_q-layer.0": 1649.6741, "encoder_q-layer.1": 1803.3875, "encoder_q-layer.10": 2375.0425, "encoder_q-layer.11": 6145.4956, "encoder_q-layer.2": 2030.6479, "encoder_q-layer.3": 2104.8889, "encoder_q-layer.4": 2241.2654, "encoder_q-layer.5": 2129.4995, "encoder_q-layer.6": 2146.5652, "encoder_q-layer.7": 2278.8745, "encoder_q-layer.8": 2430.5708, "encoder_q-layer.9": 2337.9771, "epoch": 0.5, "inbatch_neg_score": 0.4505, "inbatch_pos_score": 1.0312, "learning_rate": 1.2666666666666668e-05, "loss": 3.7248, "norm_diff": 0.11, "norm_loss": 0.0, "num_token_doc": 66.8783, "num_token_overlap": 11.6775, "num_token_query": 31.4458, "num_token_union": 65.2091, "num_word_context": 202.406, "num_word_doc": 49.877, "num_word_query": 23.3566, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4205.7315, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4512, "query_norm": 1.3805, "queue_k_norm": 1.4854, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4458, "sent_len_1": 66.8783, "sent_len_max_0": 127.36, "sent_len_max_1": 189.8275, "stdk": 0.0482, "stdq": 0.0431, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 77200 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.7465, "doc_norm": 1.4852, "encoder_q-embeddings": 3605.9224, "encoder_q-layer.0": 2446.9543, "encoder_q-layer.1": 2598.4602, "encoder_q-layer.10": 2615.5742, "encoder_q-layer.11": 6269.6743, "encoder_q-layer.2": 2814.6025, "encoder_q-layer.3": 2947.885, "encoder_q-layer.4": 2760.6357, "encoder_q-layer.5": 2704.3215, "encoder_q-layer.6": 2783.0083, "encoder_q-layer.7": 2881.8943, "encoder_q-layer.8": 2847.3411, "encoder_q-layer.9": 2328.9702, "epoch": 0.5, "inbatch_neg_score": 0.4551, "inbatch_pos_score": 1.0225, "learning_rate": 1.2611111111111113e-05, "loss": 3.7465, "norm_diff": 0.1095, "norm_loss": 0.0, "num_token_doc": 66.6448, "num_token_overlap": 11.6611, "num_token_query": 31.3115, "num_token_union": 64.9766, "num_word_context": 202.1225, "num_word_doc": 49.7282, "num_word_query": 23.2597, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4914.9289, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4546, "query_norm": 1.3757, "queue_k_norm": 1.4842, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3115, "sent_len_1": 66.6448, "sent_len_max_0": 127.4262, "sent_len_max_1": 189.2663, "stdk": 0.0479, "stdq": 0.0428, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 77300 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.7494, "doc_norm": 1.4832, "encoder_q-embeddings": 3171.1729, "encoder_q-layer.0": 2190.3657, "encoder_q-layer.1": 2286.3586, "encoder_q-layer.10": 2523.1665, "encoder_q-layer.11": 6261.3125, "encoder_q-layer.2": 2646.5349, "encoder_q-layer.3": 2802.2598, "encoder_q-layer.4": 2721.8533, "encoder_q-layer.5": 2750.5928, "encoder_q-layer.6": 2694.7014, "encoder_q-layer.7": 2872.5005, "encoder_q-layer.8": 2671.1592, "encoder_q-layer.9": 2342.5522, "epoch": 0.5, "inbatch_neg_score": 0.454, "inbatch_pos_score": 1.0283, "learning_rate": 1.2555555555555557e-05, "loss": 3.7494, "norm_diff": 0.1112, "norm_loss": 0.0, "num_token_doc": 66.6259, "num_token_overlap": 11.6449, "num_token_query": 31.3641, "num_token_union": 65.0558, "num_word_context": 202.1289, "num_word_doc": 49.7065, "num_word_query": 23.2863, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4791.4822, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4541, "query_norm": 1.3721, "queue_k_norm": 1.486, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3641, "sent_len_1": 66.6259, "sent_len_max_0": 127.6375, "sent_len_max_1": 189.9988, "stdk": 0.0479, "stdq": 0.0427, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 77400 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7263, "doc_norm": 1.4852, "encoder_q-embeddings": 2405.3042, "encoder_q-layer.0": 1664.6869, "encoder_q-layer.1": 1776.5975, "encoder_q-layer.10": 2489.7222, "encoder_q-layer.11": 6257.8838, "encoder_q-layer.2": 1957.3345, "encoder_q-layer.3": 2032.6357, "encoder_q-layer.4": 2132.6697, "encoder_q-layer.5": 2175.4099, "encoder_q-layer.6": 2373.1958, "encoder_q-layer.7": 2453.5129, "encoder_q-layer.8": 2743.4011, "encoder_q-layer.9": 2439.5784, "epoch": 0.5, "inbatch_neg_score": 0.4569, "inbatch_pos_score": 1.0264, "learning_rate": 1.25e-05, "loss": 3.7263, "norm_diff": 0.1158, "norm_loss": 0.0, "num_token_doc": 66.658, "num_token_overlap": 11.7223, "num_token_query": 31.4909, "num_token_union": 65.0926, "num_word_context": 202.0441, "num_word_doc": 49.7164, "num_word_query": 23.4054, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4250.0101, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4546, "query_norm": 1.3694, "queue_k_norm": 1.4877, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4909, "sent_len_1": 66.658, "sent_len_max_0": 127.4237, "sent_len_max_1": 189.6975, "stdk": 0.0479, "stdq": 0.0426, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 77500 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.7381, "doc_norm": 1.4881, "encoder_q-embeddings": 3049.928, "encoder_q-layer.0": 2001.1938, "encoder_q-layer.1": 2283.9163, "encoder_q-layer.10": 2795.1953, "encoder_q-layer.11": 6627.5508, "encoder_q-layer.2": 2479.885, "encoder_q-layer.3": 2569.7454, "encoder_q-layer.4": 2611.2361, "encoder_q-layer.5": 2799.3428, "encoder_q-layer.6": 2993.7029, "encoder_q-layer.7": 2879.7864, "encoder_q-layer.8": 2982.2078, "encoder_q-layer.9": 2400.6211, "epoch": 0.51, "inbatch_neg_score": 0.4556, "inbatch_pos_score": 1.0127, "learning_rate": 1.2444444444444445e-05, "loss": 3.7381, "norm_diff": 0.1188, "norm_loss": 0.0, "num_token_doc": 66.5257, "num_token_overlap": 11.6456, "num_token_query": 31.3422, "num_token_union": 64.9627, "num_word_context": 201.6279, "num_word_doc": 49.6545, "num_word_query": 23.2567, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4828.3573, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4543, "query_norm": 1.3694, "queue_k_norm": 1.4855, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3422, "sent_len_1": 66.5257, "sent_len_max_0": 127.6012, "sent_len_max_1": 188.7225, "stdk": 0.048, "stdq": 0.0426, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 77600 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.7345, "doc_norm": 1.495, "encoder_q-embeddings": 2226.0393, "encoder_q-layer.0": 1543.7041, "encoder_q-layer.1": 1683.3909, "encoder_q-layer.10": 2436.812, "encoder_q-layer.11": 6237.8945, "encoder_q-layer.2": 1934.9951, "encoder_q-layer.3": 1908.1589, "encoder_q-layer.4": 2077.6128, "encoder_q-layer.5": 2169.2974, "encoder_q-layer.6": 2326.8147, "encoder_q-layer.7": 2438.416, "encoder_q-layer.8": 2503.9485, "encoder_q-layer.9": 2347.3149, "epoch": 0.51, "inbatch_neg_score": 0.4518, "inbatch_pos_score": 1.04, "learning_rate": 1.238888888888889e-05, "loss": 3.7345, "norm_diff": 0.1288, "norm_loss": 0.0, "num_token_doc": 66.8134, "num_token_overlap": 11.6866, "num_token_query": 31.4252, "num_token_union": 65.1355, "num_word_context": 202.2014, "num_word_doc": 49.8712, "num_word_query": 23.3519, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4176.1447, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4529, "query_norm": 1.3663, "queue_k_norm": 1.488, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4252, "sent_len_1": 66.8134, "sent_len_max_0": 127.1975, "sent_len_max_1": 190.535, "stdk": 0.0483, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 77700 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.7516, "doc_norm": 1.4824, "encoder_q-embeddings": 2354.3352, "encoder_q-layer.0": 1566.9341, "encoder_q-layer.1": 1668.7979, "encoder_q-layer.10": 2714.3276, "encoder_q-layer.11": 6082.7466, "encoder_q-layer.2": 1857.0878, "encoder_q-layer.3": 2005.9252, "encoder_q-layer.4": 2117.1282, "encoder_q-layer.5": 2109.7432, "encoder_q-layer.6": 2278.5688, "encoder_q-layer.7": 2436.4685, "encoder_q-layer.8": 2530.408, "encoder_q-layer.9": 2284.4158, "epoch": 0.51, "inbatch_neg_score": 0.4552, "inbatch_pos_score": 1.0293, "learning_rate": 1.2333333333333334e-05, "loss": 3.7516, "norm_diff": 0.1121, "norm_loss": 0.0, "num_token_doc": 66.8433, "num_token_overlap": 11.6337, "num_token_query": 31.3533, "num_token_union": 65.2151, "num_word_context": 202.5156, "num_word_doc": 49.8491, "num_word_query": 23.2766, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4128.3983, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4548, "query_norm": 1.3703, "queue_k_norm": 1.4876, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3533, "sent_len_1": 66.8433, "sent_len_max_0": 127.56, "sent_len_max_1": 191.2463, "stdk": 0.0478, "stdq": 0.0426, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 77800 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7587, "doc_norm": 1.4875, "encoder_q-embeddings": 2598.3943, "encoder_q-layer.0": 1700.3145, "encoder_q-layer.1": 1827.9363, "encoder_q-layer.10": 2707.1777, "encoder_q-layer.11": 6784.4072, "encoder_q-layer.2": 1984.9648, "encoder_q-layer.3": 2005.6052, "encoder_q-layer.4": 1966.6841, "encoder_q-layer.5": 1941.5876, "encoder_q-layer.6": 2148.7925, "encoder_q-layer.7": 2285.6543, "encoder_q-layer.8": 2630.677, "encoder_q-layer.9": 2597.1328, "epoch": 0.51, "inbatch_neg_score": 0.4547, "inbatch_pos_score": 1.0176, "learning_rate": 1.2277777777777778e-05, "loss": 3.7587, "norm_diff": 0.1062, "norm_loss": 0.0, "num_token_doc": 66.5375, "num_token_overlap": 11.66, "num_token_query": 31.381, "num_token_union": 64.9729, "num_word_context": 201.9846, "num_word_doc": 49.6189, "num_word_query": 23.302, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4370.9469, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4551, "query_norm": 1.3813, "queue_k_norm": 1.4866, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.381, "sent_len_1": 66.5375, "sent_len_max_0": 127.5012, "sent_len_max_1": 190.145, "stdk": 0.048, "stdq": 0.043, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 77900 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.7324, "doc_norm": 1.4856, "encoder_q-embeddings": 2510.7388, "encoder_q-layer.0": 1738.973, "encoder_q-layer.1": 1832.3619, "encoder_q-layer.10": 2880.7559, "encoder_q-layer.11": 6391.6826, "encoder_q-layer.2": 2072.8425, "encoder_q-layer.3": 2094.3372, "encoder_q-layer.4": 2148.7007, "encoder_q-layer.5": 2189.7202, "encoder_q-layer.6": 2289.6636, "encoder_q-layer.7": 2480.262, "encoder_q-layer.8": 2690.374, "encoder_q-layer.9": 2506.2532, "epoch": 0.51, "inbatch_neg_score": 0.4562, "inbatch_pos_score": 1.0293, "learning_rate": 1.2222222222222222e-05, "loss": 3.7324, "norm_diff": 0.11, "norm_loss": 0.0, "num_token_doc": 66.723, "num_token_overlap": 11.6632, "num_token_query": 31.2564, "num_token_union": 65.007, "num_word_context": 202.2623, "num_word_doc": 49.7785, "num_word_query": 23.208, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4352.9511, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4563, "query_norm": 1.3756, "queue_k_norm": 1.4874, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2564, "sent_len_1": 66.723, "sent_len_max_0": 127.3662, "sent_len_max_1": 190.615, "stdk": 0.0479, "stdq": 0.0428, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 78000 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.7441, "doc_norm": 1.4885, "encoder_q-embeddings": 2608.4392, "encoder_q-layer.0": 1814.4517, "encoder_q-layer.1": 1899.3484, "encoder_q-layer.10": 2684.8118, "encoder_q-layer.11": 6187.3071, "encoder_q-layer.2": 2238.1365, "encoder_q-layer.3": 2325.9172, "encoder_q-layer.4": 2412.3462, "encoder_q-layer.5": 2573.4802, "encoder_q-layer.6": 2449.9575, "encoder_q-layer.7": 2773.616, "encoder_q-layer.8": 2628.7966, "encoder_q-layer.9": 2387.7046, "epoch": 0.51, "inbatch_neg_score": 0.4573, "inbatch_pos_score": 1.0264, "learning_rate": 1.2166666666666668e-05, "loss": 3.7441, "norm_diff": 0.1178, "norm_loss": 0.0, "num_token_doc": 66.5247, "num_token_overlap": 11.641, "num_token_query": 31.3693, "num_token_union": 64.9586, "num_word_context": 201.9937, "num_word_doc": 49.6452, "num_word_query": 23.3122, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4365.5045, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.457, "query_norm": 1.3707, "queue_k_norm": 1.4855, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3693, "sent_len_1": 66.5247, "sent_len_max_0": 127.4163, "sent_len_max_1": 189.3413, "stdk": 0.048, "stdq": 0.0425, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 78100 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 3.7523, "doc_norm": 1.4845, "encoder_q-embeddings": 6225.4634, "encoder_q-layer.0": 4293.1787, "encoder_q-layer.1": 4453.5054, "encoder_q-layer.10": 2750.7073, "encoder_q-layer.11": 6406.4575, "encoder_q-layer.2": 5299.3018, "encoder_q-layer.3": 5805.9243, "encoder_q-layer.4": 6404.6484, "encoder_q-layer.5": 6684.6382, "encoder_q-layer.6": 6136.4443, "encoder_q-layer.7": 4588.8848, "encoder_q-layer.8": 3574.9556, "encoder_q-layer.9": 2563.678, "epoch": 0.51, "inbatch_neg_score": 0.4567, "inbatch_pos_score": 1.0068, "learning_rate": 1.2111111111111112e-05, "loss": 3.7523, "norm_diff": 0.1143, "norm_loss": 0.0, "num_token_doc": 66.8947, "num_token_overlap": 11.6298, "num_token_query": 31.126, "num_token_union": 65.0349, "num_word_context": 202.1828, "num_word_doc": 49.9119, "num_word_query": 23.1037, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7822.2176, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4563, "query_norm": 1.3702, "queue_k_norm": 1.4874, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.126, "sent_len_1": 66.8947, "sent_len_max_0": 127.6525, "sent_len_max_1": 190.1475, "stdk": 0.0478, "stdq": 0.0426, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 78200 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.722, "doc_norm": 1.4905, "encoder_q-embeddings": 2308.6775, "encoder_q-layer.0": 1547.597, "encoder_q-layer.1": 1671.5007, "encoder_q-layer.10": 2440.5137, "encoder_q-layer.11": 6041.8965, "encoder_q-layer.2": 1852.729, "encoder_q-layer.3": 1921.7062, "encoder_q-layer.4": 1977.7521, "encoder_q-layer.5": 2061.7183, "encoder_q-layer.6": 2215.0493, "encoder_q-layer.7": 2189.167, "encoder_q-layer.8": 2396.2903, "encoder_q-layer.9": 2207.73, "epoch": 0.51, "inbatch_neg_score": 0.4548, "inbatch_pos_score": 1.04, "learning_rate": 1.2055555555555556e-05, "loss": 3.722, "norm_diff": 0.1208, "norm_loss": 0.0, "num_token_doc": 66.7256, "num_token_overlap": 11.6949, "num_token_query": 31.4763, "num_token_union": 65.1268, "num_word_context": 202.4206, "num_word_doc": 49.8244, "num_word_query": 23.391, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4010.0558, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4548, "query_norm": 1.3697, "queue_k_norm": 1.488, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4763, "sent_len_1": 66.7256, "sent_len_max_0": 127.51, "sent_len_max_1": 189.5538, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 78300 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.721, "doc_norm": 1.4861, "encoder_q-embeddings": 6081.147, "encoder_q-layer.0": 4610.9385, "encoder_q-layer.1": 4883.5576, "encoder_q-layer.10": 2536.437, "encoder_q-layer.11": 5937.4321, "encoder_q-layer.2": 6223.3188, "encoder_q-layer.3": 6047.6289, "encoder_q-layer.4": 6229.7158, "encoder_q-layer.5": 6496.4854, "encoder_q-layer.6": 4735.1606, "encoder_q-layer.7": 3839.3049, "encoder_q-layer.8": 3172.0488, "encoder_q-layer.9": 2354.4365, "epoch": 0.51, "inbatch_neg_score": 0.4573, "inbatch_pos_score": 1.0176, "learning_rate": 1.2e-05, "loss": 3.721, "norm_diff": 0.1111, "norm_loss": 0.0, "num_token_doc": 66.7, "num_token_overlap": 11.6944, "num_token_query": 31.4575, "num_token_union": 65.0972, "num_word_context": 201.8932, "num_word_doc": 49.7157, "num_word_query": 23.3916, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7610.2141, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4575, "query_norm": 1.375, "queue_k_norm": 1.4873, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4575, "sent_len_1": 66.7, "sent_len_max_0": 127.5537, "sent_len_max_1": 190.1875, "stdk": 0.0479, "stdq": 0.0427, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 78400 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.7246, "doc_norm": 1.4947, "encoder_q-embeddings": 2604.0127, "encoder_q-layer.0": 1721.1656, "encoder_q-layer.1": 1943.3752, "encoder_q-layer.10": 2415.9893, "encoder_q-layer.11": 6048.9014, "encoder_q-layer.2": 2225.5522, "encoder_q-layer.3": 2387.5547, "encoder_q-layer.4": 2551.9241, "encoder_q-layer.5": 2562.3301, "encoder_q-layer.6": 2334.2781, "encoder_q-layer.7": 2341.0415, "encoder_q-layer.8": 2609.7791, "encoder_q-layer.9": 2383.3447, "epoch": 0.51, "inbatch_neg_score": 0.4564, "inbatch_pos_score": 1.0322, "learning_rate": 1.1944444444444446e-05, "loss": 3.7246, "norm_diff": 0.1208, "norm_loss": 0.0, "num_token_doc": 66.9393, "num_token_overlap": 11.6742, "num_token_query": 31.4369, "num_token_union": 65.2835, "num_word_context": 202.1179, "num_word_doc": 49.9141, "num_word_query": 23.3681, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4354.3742, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4563, "query_norm": 1.3739, "queue_k_norm": 1.4869, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4369, "sent_len_1": 66.9393, "sent_len_max_0": 127.58, "sent_len_max_1": 190.0788, "stdk": 0.0482, "stdq": 0.0427, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 78500 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.7431, "doc_norm": 1.4948, "encoder_q-embeddings": 3641.4248, "encoder_q-layer.0": 2541.5598, "encoder_q-layer.1": 2872.3706, "encoder_q-layer.10": 2622.4016, "encoder_q-layer.11": 6232.7417, "encoder_q-layer.2": 3521.6426, "encoder_q-layer.3": 3620.3386, "encoder_q-layer.4": 3421.2673, "encoder_q-layer.5": 2984.3569, "encoder_q-layer.6": 2606.6807, "encoder_q-layer.7": 2548.7146, "encoder_q-layer.8": 2607.2161, "encoder_q-layer.9": 2362.1731, "epoch": 0.51, "inbatch_neg_score": 0.4575, "inbatch_pos_score": 1.04, "learning_rate": 1.188888888888889e-05, "loss": 3.7431, "norm_diff": 0.1112, "norm_loss": 0.0, "num_token_doc": 66.8485, "num_token_overlap": 11.6498, "num_token_query": 31.4346, "num_token_union": 65.2301, "num_word_context": 202.4598, "num_word_doc": 49.8898, "num_word_query": 23.3515, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5115.6137, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4561, "query_norm": 1.3836, "queue_k_norm": 1.489, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4346, "sent_len_1": 66.8485, "sent_len_max_0": 127.3925, "sent_len_max_1": 188.5938, "stdk": 0.0482, "stdq": 0.043, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 78600 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.7376, "doc_norm": 1.4828, "encoder_q-embeddings": 2084.165, "encoder_q-layer.0": 1379.7679, "encoder_q-layer.1": 1477.5065, "encoder_q-layer.10": 2483.4966, "encoder_q-layer.11": 6097.4126, "encoder_q-layer.2": 1726.3934, "encoder_q-layer.3": 1805.2623, "encoder_q-layer.4": 1896.6818, "encoder_q-layer.5": 1928.2712, "encoder_q-layer.6": 1975.8264, "encoder_q-layer.7": 2191.1519, "encoder_q-layer.8": 2413.9243, "encoder_q-layer.9": 2281.1714, "epoch": 0.51, "inbatch_neg_score": 0.4578, "inbatch_pos_score": 1.0303, "learning_rate": 1.1833333333333334e-05, "loss": 3.7376, "norm_diff": 0.1076, "norm_loss": 0.0, "num_token_doc": 67.0339, "num_token_overlap": 11.6862, "num_token_query": 31.4091, "num_token_union": 65.31, "num_word_context": 202.6272, "num_word_doc": 49.9877, "num_word_query": 23.3519, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3954.402, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.458, "query_norm": 1.3752, "queue_k_norm": 1.4872, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4091, "sent_len_1": 67.0339, "sent_len_max_0": 127.6663, "sent_len_max_1": 189.8125, "stdk": 0.0478, "stdq": 0.0426, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 78700 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.7329, "doc_norm": 1.4899, "encoder_q-embeddings": 2023.085, "encoder_q-layer.0": 1362.6487, "encoder_q-layer.1": 1420.6547, "encoder_q-layer.10": 2645.7949, "encoder_q-layer.11": 6365.2173, "encoder_q-layer.2": 1589.7153, "encoder_q-layer.3": 1605.8378, "encoder_q-layer.4": 1728.9246, "encoder_q-layer.5": 1626.7468, "encoder_q-layer.6": 1859.8831, "encoder_q-layer.7": 2094.4417, "encoder_q-layer.8": 2522.6165, "encoder_q-layer.9": 2330.3677, "epoch": 0.51, "inbatch_neg_score": 0.4582, "inbatch_pos_score": 1.0215, "learning_rate": 1.1777777777777778e-05, "loss": 3.7329, "norm_diff": 0.1215, "norm_loss": 0.0, "num_token_doc": 66.7212, "num_token_overlap": 11.6589, "num_token_query": 31.4446, "num_token_union": 65.1145, "num_word_context": 202.2997, "num_word_doc": 49.796, "num_word_query": 23.3553, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3921.0077, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.458, "query_norm": 1.3684, "queue_k_norm": 1.4889, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4446, "sent_len_1": 66.7212, "sent_len_max_0": 127.425, "sent_len_max_1": 190.0737, "stdk": 0.048, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 78800 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7242, "doc_norm": 1.4954, "encoder_q-embeddings": 3049.3599, "encoder_q-layer.0": 2234.2803, "encoder_q-layer.1": 2388.8867, "encoder_q-layer.10": 2570.3386, "encoder_q-layer.11": 6169.5132, "encoder_q-layer.2": 2624.8984, "encoder_q-layer.3": 2622.4192, "encoder_q-layer.4": 2765.6946, "encoder_q-layer.5": 2582.6277, "encoder_q-layer.6": 2596.4802, "encoder_q-layer.7": 2466.3081, "encoder_q-layer.8": 2694.6377, "encoder_q-layer.9": 2417.2546, "epoch": 0.51, "inbatch_neg_score": 0.4578, "inbatch_pos_score": 1.0215, "learning_rate": 1.1722222222222224e-05, "loss": 3.7242, "norm_diff": 0.1286, "norm_loss": 0.0, "num_token_doc": 67.1069, "num_token_overlap": 11.7178, "num_token_query": 31.4312, "num_token_union": 65.3045, "num_word_context": 202.8239, "num_word_doc": 50.0762, "num_word_query": 23.3377, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4646.6177, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.459, "query_norm": 1.3668, "queue_k_norm": 1.4904, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4312, "sent_len_1": 67.1069, "sent_len_max_0": 127.595, "sent_len_max_1": 190.96, "stdk": 0.0483, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 78900 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.7284, "doc_norm": 1.4882, "encoder_q-embeddings": 3080.938, "encoder_q-layer.0": 2093.6436, "encoder_q-layer.1": 2365.2009, "encoder_q-layer.10": 2687.7439, "encoder_q-layer.11": 6055.2144, "encoder_q-layer.2": 2632.0527, "encoder_q-layer.3": 2790.0913, "encoder_q-layer.4": 2997.6946, "encoder_q-layer.5": 2715.6162, "encoder_q-layer.6": 2612.4917, "encoder_q-layer.7": 2585.1667, "encoder_q-layer.8": 2760.0823, "encoder_q-layer.9": 2571.239, "epoch": 0.51, "inbatch_neg_score": 0.4656, "inbatch_pos_score": 1.0547, "learning_rate": 1.1666666666666668e-05, "loss": 3.7284, "norm_diff": 0.0993, "norm_loss": 0.0, "num_token_doc": 66.9126, "num_token_overlap": 11.6678, "num_token_query": 31.375, "num_token_union": 65.1935, "num_word_context": 202.1018, "num_word_doc": 49.9403, "num_word_query": 23.3153, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4674.6728, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4648, "query_norm": 1.3889, "queue_k_norm": 1.4888, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.375, "sent_len_1": 66.9126, "sent_len_max_0": 127.4925, "sent_len_max_1": 190.015, "stdk": 0.048, "stdq": 0.0431, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 79000 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.7443, "doc_norm": 1.5009, "encoder_q-embeddings": 2283.2883, "encoder_q-layer.0": 1508.4724, "encoder_q-layer.1": 1576.0935, "encoder_q-layer.10": 2430.5986, "encoder_q-layer.11": 6205.0801, "encoder_q-layer.2": 1791.8845, "encoder_q-layer.3": 1841.4314, "encoder_q-layer.4": 1953.7709, "encoder_q-layer.5": 1991.0989, "encoder_q-layer.6": 2161.2156, "encoder_q-layer.7": 2257.5845, "encoder_q-layer.8": 2419.844, "encoder_q-layer.9": 2246.9834, "epoch": 0.51, "inbatch_neg_score": 0.4661, "inbatch_pos_score": 1.0439, "learning_rate": 1.1611111111111112e-05, "loss": 3.7443, "norm_diff": 0.1285, "norm_loss": 0.0, "num_token_doc": 66.8413, "num_token_overlap": 11.7176, "num_token_query": 31.4649, "num_token_union": 65.2133, "num_word_context": 202.5867, "num_word_doc": 49.8912, "num_word_query": 23.3606, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4078.6335, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4656, "query_norm": 1.3724, "queue_k_norm": 1.4894, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4649, "sent_len_1": 66.8413, "sent_len_max_0": 127.4688, "sent_len_max_1": 189.2438, "stdk": 0.0484, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 79100 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.7295, "doc_norm": 1.493, "encoder_q-embeddings": 7618.7227, "encoder_q-layer.0": 5685.3398, "encoder_q-layer.1": 6090.0742, "encoder_q-layer.10": 2628.425, "encoder_q-layer.11": 6121.6934, "encoder_q-layer.2": 6717.7891, "encoder_q-layer.3": 6846.6431, "encoder_q-layer.4": 7184.1973, "encoder_q-layer.5": 6641.8223, "encoder_q-layer.6": 5708.9766, "encoder_q-layer.7": 4395.2192, "encoder_q-layer.8": 3894.1995, "encoder_q-layer.9": 2830.2283, "epoch": 0.52, "inbatch_neg_score": 0.4664, "inbatch_pos_score": 1.0361, "learning_rate": 1.1555555555555556e-05, "loss": 3.7295, "norm_diff": 0.1209, "norm_loss": 0.0, "num_token_doc": 66.765, "num_token_overlap": 11.6551, "num_token_query": 31.3848, "num_token_union": 65.1026, "num_word_context": 202.1579, "num_word_doc": 49.7697, "num_word_query": 23.3, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8960.2222, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4678, "query_norm": 1.3721, "queue_k_norm": 1.4913, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3848, "sent_len_1": 66.765, "sent_len_max_0": 127.4513, "sent_len_max_1": 190.7612, "stdk": 0.0482, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 79200 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.7331, "doc_norm": 1.4899, "encoder_q-embeddings": 2289.5269, "encoder_q-layer.0": 1497.6077, "encoder_q-layer.1": 1555.6588, "encoder_q-layer.10": 2531.5642, "encoder_q-layer.11": 6370.3936, "encoder_q-layer.2": 1736.8875, "encoder_q-layer.3": 1844.4486, "encoder_q-layer.4": 1870.437, "encoder_q-layer.5": 1897.8622, "encoder_q-layer.6": 1975.3113, "encoder_q-layer.7": 2276.5068, "encoder_q-layer.8": 2570.0315, "encoder_q-layer.9": 2362.7742, "epoch": 0.52, "inbatch_neg_score": 0.47, "inbatch_pos_score": 1.0459, "learning_rate": 1.1500000000000002e-05, "loss": 3.7331, "norm_diff": 0.1039, "norm_loss": 0.0, "num_token_doc": 66.5189, "num_token_overlap": 11.6614, "num_token_query": 31.38, "num_token_union": 64.9689, "num_word_context": 201.8163, "num_word_doc": 49.6244, "num_word_query": 23.2994, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4147.0258, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4688, "query_norm": 1.386, "queue_k_norm": 1.4899, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.38, "sent_len_1": 66.5189, "sent_len_max_0": 127.5438, "sent_len_max_1": 187.9762, "stdk": 0.048, "stdq": 0.0429, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 79300 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.7185, "doc_norm": 1.4942, "encoder_q-embeddings": 2943.2793, "encoder_q-layer.0": 2003.0565, "encoder_q-layer.1": 2109.0925, "encoder_q-layer.10": 2597.6746, "encoder_q-layer.11": 6573.3662, "encoder_q-layer.2": 2299.6001, "encoder_q-layer.3": 2459.7734, "encoder_q-layer.4": 2556.0557, "encoder_q-layer.5": 2636.9111, "encoder_q-layer.6": 2740.8926, "encoder_q-layer.7": 2644.9973, "encoder_q-layer.8": 2886.2771, "encoder_q-layer.9": 2577.0713, "epoch": 0.52, "inbatch_neg_score": 0.4668, "inbatch_pos_score": 1.0332, "learning_rate": 1.1444444444444446e-05, "loss": 3.7185, "norm_diff": 0.1201, "norm_loss": 0.0, "num_token_doc": 66.8934, "num_token_overlap": 11.7382, "num_token_query": 31.5387, "num_token_union": 65.2648, "num_word_context": 202.5777, "num_word_doc": 49.9199, "num_word_query": 23.4369, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4711.9031, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4675, "query_norm": 1.3741, "queue_k_norm": 1.4936, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5387, "sent_len_1": 66.8934, "sent_len_max_0": 127.3912, "sent_len_max_1": 191.4225, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 79400 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.7343, "doc_norm": 1.4913, "encoder_q-embeddings": 5141.2998, "encoder_q-layer.0": 3446.3657, "encoder_q-layer.1": 3845.5359, "encoder_q-layer.10": 4912.1738, "encoder_q-layer.11": 12454.7871, "encoder_q-layer.2": 4405.3813, "encoder_q-layer.3": 4596.106, "encoder_q-layer.4": 4980.1255, "encoder_q-layer.5": 4548.8931, "encoder_q-layer.6": 4515.6621, "encoder_q-layer.7": 4649.686, "encoder_q-layer.8": 5176.4565, "encoder_q-layer.9": 4420.457, "epoch": 0.52, "inbatch_neg_score": 0.4702, "inbatch_pos_score": 1.0488, "learning_rate": 1.138888888888889e-05, "loss": 3.7343, "norm_diff": 0.11, "norm_loss": 0.0, "num_token_doc": 66.7173, "num_token_overlap": 11.66, "num_token_query": 31.3248, "num_token_union": 65.0728, "num_word_context": 202.4094, "num_word_doc": 49.7699, "num_word_query": 23.2574, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8684.8748, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.469, "query_norm": 1.3813, "queue_k_norm": 1.4919, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3248, "sent_len_1": 66.7173, "sent_len_max_0": 127.45, "sent_len_max_1": 190.265, "stdk": 0.048, "stdq": 0.0428, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 79500 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.7179, "doc_norm": 1.4902, "encoder_q-embeddings": 7591.7075, "encoder_q-layer.0": 5162.8501, "encoder_q-layer.1": 6041.5898, "encoder_q-layer.10": 5052.6406, "encoder_q-layer.11": 13047.6475, "encoder_q-layer.2": 7627.3687, "encoder_q-layer.3": 7946.7817, "encoder_q-layer.4": 9293.4834, "encoder_q-layer.5": 9022.7393, "encoder_q-layer.6": 7109.7241, "encoder_q-layer.7": 6335.3442, "encoder_q-layer.8": 5858.2202, "encoder_q-layer.9": 4848.3105, "epoch": 0.52, "inbatch_neg_score": 0.469, "inbatch_pos_score": 1.0205, "learning_rate": 1.1333333333333334e-05, "loss": 3.7179, "norm_diff": 0.13, "norm_loss": 0.0, "num_token_doc": 66.8735, "num_token_overlap": 11.6736, "num_token_query": 31.4132, "num_token_union": 65.1564, "num_word_context": 202.6551, "num_word_doc": 49.8732, "num_word_query": 23.3319, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11436.8525, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.469, "query_norm": 1.3602, "queue_k_norm": 1.4915, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4132, "sent_len_1": 66.8735, "sent_len_max_0": 127.4925, "sent_len_max_1": 191.53, "stdk": 0.048, "stdq": 0.0419, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 79600 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.7336, "doc_norm": 1.4936, "encoder_q-embeddings": 4210.6626, "encoder_q-layer.0": 2873.1628, "encoder_q-layer.1": 3142.0464, "encoder_q-layer.10": 4581.2788, "encoder_q-layer.11": 11677.5225, "encoder_q-layer.2": 3596.3582, "encoder_q-layer.3": 3723.7114, "encoder_q-layer.4": 4043.6882, "encoder_q-layer.5": 4231.6265, "encoder_q-layer.6": 4680.6758, "encoder_q-layer.7": 4790.3647, "encoder_q-layer.8": 5097.2158, "encoder_q-layer.9": 4659.9702, "epoch": 0.52, "inbatch_neg_score": 0.4663, "inbatch_pos_score": 1.0625, "learning_rate": 1.127777777777778e-05, "loss": 3.7336, "norm_diff": 0.1223, "norm_loss": 0.0, "num_token_doc": 66.7702, "num_token_overlap": 11.6473, "num_token_query": 31.3187, "num_token_union": 65.072, "num_word_context": 201.9261, "num_word_doc": 49.7905, "num_word_query": 23.246, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7885.1521, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4658, "query_norm": 1.3712, "queue_k_norm": 1.4922, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3187, "sent_len_1": 66.7702, "sent_len_max_0": 127.6637, "sent_len_max_1": 191.2937, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 79700 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.7521, "doc_norm": 1.4915, "encoder_q-embeddings": 5312.0728, "encoder_q-layer.0": 3702.7773, "encoder_q-layer.1": 3922.8306, "encoder_q-layer.10": 5018.2017, "encoder_q-layer.11": 12934.1738, "encoder_q-layer.2": 4588.3081, "encoder_q-layer.3": 4654.1616, "encoder_q-layer.4": 5153.625, "encoder_q-layer.5": 5482.4531, "encoder_q-layer.6": 5320.8867, "encoder_q-layer.7": 4967.6572, "encoder_q-layer.8": 5237.2114, "encoder_q-layer.9": 4660.3677, "epoch": 0.52, "inbatch_neg_score": 0.4642, "inbatch_pos_score": 1.0166, "learning_rate": 1.1222222222222224e-05, "loss": 3.7521, "norm_diff": 0.1253, "norm_loss": 0.0, "num_token_doc": 66.6924, "num_token_overlap": 11.6263, "num_token_query": 31.2524, "num_token_union": 65.0446, "num_word_context": 202.3046, "num_word_doc": 49.7646, "num_word_query": 23.2089, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8950.7553, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4639, "query_norm": 1.3662, "queue_k_norm": 1.4927, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2524, "sent_len_1": 66.6924, "sent_len_max_0": 127.4375, "sent_len_max_1": 189.6825, "stdk": 0.048, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 79800 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.7391, "doc_norm": 1.4922, "encoder_q-embeddings": 4849.9917, "encoder_q-layer.0": 3233.7234, "encoder_q-layer.1": 3379.637, "encoder_q-layer.10": 4951.2354, "encoder_q-layer.11": 13489.4062, "encoder_q-layer.2": 3837.5466, "encoder_q-layer.3": 4000.1562, "encoder_q-layer.4": 4222.4517, "encoder_q-layer.5": 4352.2065, "encoder_q-layer.6": 4600.4141, "encoder_q-layer.7": 4799.1772, "encoder_q-layer.8": 5676.9785, "encoder_q-layer.9": 4840.7397, "epoch": 0.52, "inbatch_neg_score": 0.4641, "inbatch_pos_score": 1.0234, "learning_rate": 1.1166666666666668e-05, "loss": 3.7391, "norm_diff": 0.1128, "norm_loss": 0.0, "num_token_doc": 66.7047, "num_token_overlap": 11.6195, "num_token_query": 31.2771, "num_token_union": 65.0527, "num_word_context": 202.1885, "num_word_doc": 49.7417, "num_word_query": 23.1993, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8791.7233, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4644, "query_norm": 1.3794, "queue_k_norm": 1.4912, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2771, "sent_len_1": 66.7047, "sent_len_max_0": 127.615, "sent_len_max_1": 190.8363, "stdk": 0.0481, "stdq": 0.0429, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 79900 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.751, "doc_norm": 1.4963, "encoder_q-embeddings": 2044.9636, "encoder_q-layer.0": 1353.2689, "encoder_q-layer.1": 1451.1195, "encoder_q-layer.10": 2551.7056, "encoder_q-layer.11": 6464.0889, "encoder_q-layer.2": 1635.9528, "encoder_q-layer.3": 1653.6248, "encoder_q-layer.4": 1783.3784, "encoder_q-layer.5": 1855.4259, "encoder_q-layer.6": 2016.2001, "encoder_q-layer.7": 2222.1533, "encoder_q-layer.8": 2595.2751, "encoder_q-layer.9": 2398.394, "epoch": 0.52, "inbatch_neg_score": 0.4645, "inbatch_pos_score": 1.0391, "learning_rate": 1.1111111111111112e-05, "loss": 3.751, "norm_diff": 0.131, "norm_loss": 0.0, "num_token_doc": 66.6339, "num_token_overlap": 11.6209, "num_token_query": 31.1975, "num_token_union": 64.9594, "num_word_context": 201.7491, "num_word_doc": 49.7059, "num_word_query": 23.156, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4091.4966, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4631, "query_norm": 1.3652, "queue_k_norm": 1.4919, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.1975, "sent_len_1": 66.6339, "sent_len_max_0": 127.3325, "sent_len_max_1": 190.0675, "stdk": 0.0482, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 80000 }, { "dev_runtime": 29.4859, "dev_samples_per_second": 2.171, "dev_steps_per_second": 0.034, "epoch": 0.52, "step": 80000, "test_accuracy": 92.73681640625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.40118059515953064, "test_doc_norm": 1.4506651163101196, "test_inbatch_neg_score": 0.7837334871292114, "test_inbatch_pos_score": 1.6643296480178833, "test_loss": 0.40118059515953064, "test_loss_align": 0.9305371046066284, "test_loss_unif": 3.598958969116211, "test_loss_unif_q@queue": 3.5989584922790527, "test_norm_diff": 0.011082645505666733, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.4458089768886566, "test_query_norm": 1.4587392807006836, "test_queue_k_norm": 1.4923243522644043, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04131384938955307, "test_stdq": 0.04108937457203865, "test_stdqueue_k": 0.048134010285139084, "test_stdqueue_q": 0.0 }, { "dev_runtime": 29.4859, "dev_samples_per_second": 2.171, "dev_steps_per_second": 0.034, "epoch": 0.52, "eval_beir-arguana_ndcg@10": 0.36968, "eval_beir-arguana_recall@10": 0.63442, "eval_beir-arguana_recall@100": 0.93812, "eval_beir-arguana_recall@20": 0.75533, "eval_beir-avg_ndcg@10": 0.37785616666666666, "eval_beir-avg_recall@10": 0.4474389166666667, "eval_beir-avg_recall@100": 0.6354580833333332, "eval_beir-avg_recall@20": 0.5114034166666667, "eval_beir-cqadupstack_ndcg@10": 0.2579916666666667, "eval_beir-cqadupstack_recall@10": 0.3543691666666667, "eval_beir-cqadupstack_recall@100": 0.5892308333333333, "eval_beir-cqadupstack_recall@20": 0.4240941666666667, "eval_beir-fiqa_ndcg@10": 0.24394, "eval_beir-fiqa_recall@10": 0.30069, "eval_beir-fiqa_recall@100": 0.57582, "eval_beir-fiqa_recall@20": 0.38963, "eval_beir-nfcorpus_ndcg@10": 0.30366, "eval_beir-nfcorpus_recall@10": 0.1457, "eval_beir-nfcorpus_recall@100": 0.28799, "eval_beir-nfcorpus_recall@20": 0.18167, "eval_beir-nq_ndcg@10": 0.27773, "eval_beir-nq_recall@10": 0.45657, "eval_beir-nq_recall@100": 0.79961, "eval_beir-nq_recall@20": 0.58258, "eval_beir-quora_ndcg@10": 0.77085, "eval_beir-quora_recall@10": 0.88105, "eval_beir-quora_recall@100": 0.97695, "eval_beir-quora_recall@20": 0.92519, "eval_beir-scidocs_ndcg@10": 0.15148, "eval_beir-scidocs_recall@10": 0.15898, "eval_beir-scidocs_recall@100": 0.36477, "eval_beir-scidocs_recall@20": 0.22133, "eval_beir-scifact_ndcg@10": 0.64088, "eval_beir-scifact_recall@10": 0.79056, "eval_beir-scifact_recall@100": 0.91711, "eval_beir-scifact_recall@20": 0.848, "eval_beir-trec-covid_ndcg@10": 0.56342, "eval_beir-trec-covid_recall@10": 0.612, "eval_beir-trec-covid_recall@100": 0.4636, "eval_beir-trec-covid_recall@20": 0.584, "eval_beir-webis-touche2020_ndcg@10": 0.19893, "eval_beir-webis-touche2020_recall@10": 0.14005, "eval_beir-webis-touche2020_recall@100": 0.44138, "eval_beir-webis-touche2020_recall@20": 0.20221, "eval_senteval-avg_sts": 0.7638298472602096, "eval_senteval-sickr_spearman": 0.7318982119005693, "eval_senteval-stsb_spearman": 0.7957614826198499, "step": 80000, "test_accuracy": 92.73681640625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.40118059515953064, "test_doc_norm": 1.4506651163101196, "test_inbatch_neg_score": 0.7837334871292114, "test_inbatch_pos_score": 1.6643296480178833, "test_loss": 0.40118059515953064, "test_loss_align": 0.9305371046066284, "test_loss_unif": 3.598958969116211, "test_loss_unif_q@queue": 3.5989584922790527, "test_norm_diff": 0.011082645505666733, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.4458089768886566, "test_query_norm": 1.4587392807006836, "test_queue_k_norm": 1.4923243522644043, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04131384938955307, "test_stdq": 0.04108937457203865, "test_stdqueue_k": 0.048134010285139084, "test_stdqueue_q": 0.0 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 3.7201, "doc_norm": 1.496, "encoder_q-embeddings": 2438.4661, "encoder_q-layer.0": 1616.1862, "encoder_q-layer.1": 1715.4905, "encoder_q-layer.10": 2466.0938, "encoder_q-layer.11": 6293.835, "encoder_q-layer.2": 1985.1655, "encoder_q-layer.3": 2082.3552, "encoder_q-layer.4": 2199.7549, "encoder_q-layer.5": 2185.8274, "encoder_q-layer.6": 2177.1582, "encoder_q-layer.7": 2262.0017, "encoder_q-layer.8": 2586.2322, "encoder_q-layer.9": 2348.3555, "epoch": 0.52, "inbatch_neg_score": 0.4635, "inbatch_pos_score": 1.0176, "learning_rate": 1.1055555555555556e-05, "loss": 3.7201, "norm_diff": 0.1317, "norm_loss": 0.0, "num_token_doc": 66.8415, "num_token_overlap": 11.7407, "num_token_query": 31.5687, "num_token_union": 65.195, "num_word_context": 202.5465, "num_word_doc": 49.866, "num_word_query": 23.47, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4256.0885, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4641, "query_norm": 1.3644, "queue_k_norm": 1.4925, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5687, "sent_len_1": 66.8415, "sent_len_max_0": 127.4425, "sent_len_max_1": 190.6637, "stdk": 0.0482, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 80100 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.7392, "doc_norm": 1.4978, "encoder_q-embeddings": 3330.2058, "encoder_q-layer.0": 2308.4883, "encoder_q-layer.1": 2752.179, "encoder_q-layer.10": 2632.2349, "encoder_q-layer.11": 6483.1777, "encoder_q-layer.2": 3281.4734, "encoder_q-layer.3": 3127.792, "encoder_q-layer.4": 3109.2207, "encoder_q-layer.5": 3089.0679, "encoder_q-layer.6": 2702.1021, "encoder_q-layer.7": 2657.1924, "encoder_q-layer.8": 2890.7336, "encoder_q-layer.9": 2413.5198, "epoch": 0.52, "inbatch_neg_score": 0.4647, "inbatch_pos_score": 1.0459, "learning_rate": 1.1000000000000001e-05, "loss": 3.7392, "norm_diff": 0.1214, "norm_loss": 0.0, "num_token_doc": 66.711, "num_token_overlap": 11.6271, "num_token_query": 31.2578, "num_token_union": 65.051, "num_word_context": 202.0493, "num_word_doc": 49.7539, "num_word_query": 23.1969, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5021.1945, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4644, "query_norm": 1.3764, "queue_k_norm": 1.4913, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2578, "sent_len_1": 66.711, "sent_len_max_0": 127.4975, "sent_len_max_1": 188.3088, "stdk": 0.0482, "stdq": 0.0428, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 80200 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.7222, "doc_norm": 1.5008, "encoder_q-embeddings": 3090.374, "encoder_q-layer.0": 2165.3699, "encoder_q-layer.1": 2455.9153, "encoder_q-layer.10": 2524.3352, "encoder_q-layer.11": 6305.4268, "encoder_q-layer.2": 2660.5876, "encoder_q-layer.3": 2879.2542, "encoder_q-layer.4": 3415.1592, "encoder_q-layer.5": 3855.4949, "encoder_q-layer.6": 3292.3752, "encoder_q-layer.7": 2905.8201, "encoder_q-layer.8": 2698.3752, "encoder_q-layer.9": 2454.0181, "epoch": 0.52, "inbatch_neg_score": 0.4679, "inbatch_pos_score": 1.0488, "learning_rate": 1.0944444444444445e-05, "loss": 3.7222, "norm_diff": 0.117, "norm_loss": 0.0, "num_token_doc": 66.6996, "num_token_overlap": 11.6668, "num_token_query": 31.2981, "num_token_union": 65.0318, "num_word_context": 202.1954, "num_word_doc": 49.7875, "num_word_query": 23.2385, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5053.1357, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4668, "query_norm": 1.3838, "queue_k_norm": 1.493, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2981, "sent_len_1": 66.6996, "sent_len_max_0": 127.3962, "sent_len_max_1": 189.7337, "stdk": 0.0483, "stdq": 0.043, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 80300 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.7441, "doc_norm": 1.4914, "encoder_q-embeddings": 5857.4551, "encoder_q-layer.0": 4142.5034, "encoder_q-layer.1": 4732.3433, "encoder_q-layer.10": 2629.5295, "encoder_q-layer.11": 6221.3979, "encoder_q-layer.2": 5507.729, "encoder_q-layer.3": 5854.1299, "encoder_q-layer.4": 5797.0811, "encoder_q-layer.5": 5069.8833, "encoder_q-layer.6": 3740.1951, "encoder_q-layer.7": 2961.1541, "encoder_q-layer.8": 2696.6499, "encoder_q-layer.9": 2467.229, "epoch": 0.52, "inbatch_neg_score": 0.4687, "inbatch_pos_score": 1.0439, "learning_rate": 1.088888888888889e-05, "loss": 3.7441, "norm_diff": 0.1294, "norm_loss": 0.0, "num_token_doc": 66.8284, "num_token_overlap": 11.6819, "num_token_query": 31.4992, "num_token_union": 65.2198, "num_word_context": 202.0798, "num_word_doc": 49.827, "num_word_query": 23.3888, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7080.8999, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.469, "query_norm": 1.362, "queue_k_norm": 1.4925, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4992, "sent_len_1": 66.8284, "sent_len_max_0": 127.3937, "sent_len_max_1": 191.4425, "stdk": 0.048, "stdq": 0.0421, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 80400 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.7127, "doc_norm": 1.4888, "encoder_q-embeddings": 2563.9841, "encoder_q-layer.0": 1718.114, "encoder_q-layer.1": 1825.3007, "encoder_q-layer.10": 2375.2529, "encoder_q-layer.11": 6159.9336, "encoder_q-layer.2": 2123.9128, "encoder_q-layer.3": 2189.8145, "encoder_q-layer.4": 2331.3977, "encoder_q-layer.5": 2254.0872, "encoder_q-layer.6": 2159.7874, "encoder_q-layer.7": 2279.9744, "encoder_q-layer.8": 2493.8828, "encoder_q-layer.9": 2243.2446, "epoch": 0.52, "inbatch_neg_score": 0.4681, "inbatch_pos_score": 1.0469, "learning_rate": 1.0833333333333334e-05, "loss": 3.7127, "norm_diff": 0.1182, "norm_loss": 0.0, "num_token_doc": 66.755, "num_token_overlap": 11.6932, "num_token_query": 31.4364, "num_token_union": 65.0843, "num_word_context": 202.0973, "num_word_doc": 49.8103, "num_word_query": 23.347, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4227.795, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4683, "query_norm": 1.3706, "queue_k_norm": 1.4929, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4364, "sent_len_1": 66.755, "sent_len_max_0": 127.51, "sent_len_max_1": 190.0788, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 80500 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.7365, "doc_norm": 1.4891, "encoder_q-embeddings": 2326.7151, "encoder_q-layer.0": 1590.676, "encoder_q-layer.1": 1712.6704, "encoder_q-layer.10": 2491.6514, "encoder_q-layer.11": 6210.9629, "encoder_q-layer.2": 2018.1035, "encoder_q-layer.3": 2086.6572, "encoder_q-layer.4": 2129.9719, "encoder_q-layer.5": 2133.0686, "encoder_q-layer.6": 2145.0137, "encoder_q-layer.7": 2185.1448, "encoder_q-layer.8": 2537.4885, "encoder_q-layer.9": 2271.4243, "epoch": 0.52, "inbatch_neg_score": 0.4705, "inbatch_pos_score": 1.0459, "learning_rate": 1.0777777777777778e-05, "loss": 3.7365, "norm_diff": 0.1184, "norm_loss": 0.0, "num_token_doc": 66.7664, "num_token_overlap": 11.6198, "num_token_query": 31.2647, "num_token_union": 65.0975, "num_word_context": 202.1496, "num_word_doc": 49.8198, "num_word_query": 23.2212, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4124.0457, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4705, "query_norm": 1.3707, "queue_k_norm": 1.4932, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2647, "sent_len_1": 66.7664, "sent_len_max_0": 127.4813, "sent_len_max_1": 189.99, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 80600 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.7329, "doc_norm": 1.4966, "encoder_q-embeddings": 2453.6399, "encoder_q-layer.0": 1678.834, "encoder_q-layer.1": 1904.01, "encoder_q-layer.10": 2391.0264, "encoder_q-layer.11": 6210.8623, "encoder_q-layer.2": 2232.175, "encoder_q-layer.3": 2360.7849, "encoder_q-layer.4": 2455.6799, "encoder_q-layer.5": 2230.3271, "encoder_q-layer.6": 2294.4817, "encoder_q-layer.7": 2386.1978, "encoder_q-layer.8": 2583.0938, "encoder_q-layer.9": 2336.6311, "epoch": 0.53, "inbatch_neg_score": 0.4676, "inbatch_pos_score": 1.0645, "learning_rate": 1.0722222222222222e-05, "loss": 3.7329, "norm_diff": 0.1297, "norm_loss": 0.0, "num_token_doc": 66.8134, "num_token_overlap": 11.7315, "num_token_query": 31.4804, "num_token_union": 65.1534, "num_word_context": 202.4882, "num_word_doc": 49.8794, "num_word_query": 23.3827, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4337.4827, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4692, "query_norm": 1.3669, "queue_k_norm": 1.495, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4804, "sent_len_1": 66.8134, "sent_len_max_0": 127.4488, "sent_len_max_1": 188.8575, "stdk": 0.0482, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 80700 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.726, "doc_norm": 1.494, "encoder_q-embeddings": 6280.5786, "encoder_q-layer.0": 4616.6309, "encoder_q-layer.1": 4895.1465, "encoder_q-layer.10": 2479.2981, "encoder_q-layer.11": 6101.5718, "encoder_q-layer.2": 4784.8735, "encoder_q-layer.3": 4439.7837, "encoder_q-layer.4": 3991.6101, "encoder_q-layer.5": 3357.0625, "encoder_q-layer.6": 2733.2019, "encoder_q-layer.7": 2841.3862, "encoder_q-layer.8": 2714.8469, "encoder_q-layer.9": 2300.9038, "epoch": 0.53, "inbatch_neg_score": 0.4668, "inbatch_pos_score": 1.0391, "learning_rate": 1.0666666666666667e-05, "loss": 3.726, "norm_diff": 0.12, "norm_loss": 0.0, "num_token_doc": 66.7711, "num_token_overlap": 11.6816, "num_token_query": 31.528, "num_token_union": 65.1915, "num_word_context": 202.4264, "num_word_doc": 49.8969, "num_word_query": 23.4231, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6535.5731, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4685, "query_norm": 1.374, "queue_k_norm": 1.4928, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.528, "sent_len_1": 66.7711, "sent_len_max_0": 127.5288, "sent_len_max_1": 188.1975, "stdk": 0.0481, "stdq": 0.0426, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 80800 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.7309, "doc_norm": 1.4985, "encoder_q-embeddings": 2183.4448, "encoder_q-layer.0": 1450.4341, "encoder_q-layer.1": 1513.6484, "encoder_q-layer.10": 2456.5752, "encoder_q-layer.11": 6491.2583, "encoder_q-layer.2": 1702.1577, "encoder_q-layer.3": 1770.4183, "encoder_q-layer.4": 1903.6947, "encoder_q-layer.5": 1868.4674, "encoder_q-layer.6": 1993.5619, "encoder_q-layer.7": 2201.4285, "encoder_q-layer.8": 2626.5972, "encoder_q-layer.9": 2343.2112, "epoch": 0.53, "inbatch_neg_score": 0.4682, "inbatch_pos_score": 1.0439, "learning_rate": 1.0611111111111111e-05, "loss": 3.7309, "norm_diff": 0.1295, "norm_loss": 0.0, "num_token_doc": 66.7826, "num_token_overlap": 11.6875, "num_token_query": 31.4156, "num_token_union": 65.1113, "num_word_context": 202.2663, "num_word_doc": 49.8199, "num_word_query": 23.3646, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4205.4215, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4685, "query_norm": 1.369, "queue_k_norm": 1.4938, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4156, "sent_len_1": 66.7826, "sent_len_max_0": 127.4887, "sent_len_max_1": 189.32, "stdk": 0.0482, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 80900 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.7344, "doc_norm": 1.4886, "encoder_q-embeddings": 2528.0061, "encoder_q-layer.0": 1770.5298, "encoder_q-layer.1": 1887.5525, "encoder_q-layer.10": 2483.125, "encoder_q-layer.11": 6470.6504, "encoder_q-layer.2": 2100.2551, "encoder_q-layer.3": 2245.771, "encoder_q-layer.4": 2405.1565, "encoder_q-layer.5": 2526.6489, "encoder_q-layer.6": 2484.8889, "encoder_q-layer.7": 2566.1714, "encoder_q-layer.8": 2660.2422, "encoder_q-layer.9": 2344.2004, "epoch": 0.53, "inbatch_neg_score": 0.4685, "inbatch_pos_score": 1.0449, "learning_rate": 1.0555555555555555e-05, "loss": 3.7344, "norm_diff": 0.1215, "norm_loss": 0.0, "num_token_doc": 66.6284, "num_token_overlap": 11.6258, "num_token_query": 31.373, "num_token_union": 65.0388, "num_word_context": 201.9508, "num_word_doc": 49.7093, "num_word_query": 23.3096, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4438.1069, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4688, "query_norm": 1.3671, "queue_k_norm": 1.4954, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.373, "sent_len_1": 66.6284, "sent_len_max_0": 127.62, "sent_len_max_1": 189.7525, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 81000 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.7438, "doc_norm": 1.4921, "encoder_q-embeddings": 2877.5002, "encoder_q-layer.0": 1960.4498, "encoder_q-layer.1": 2131.0869, "encoder_q-layer.10": 2701.583, "encoder_q-layer.11": 6365.2036, "encoder_q-layer.2": 2447.5818, "encoder_q-layer.3": 2489.8069, "encoder_q-layer.4": 2734.3389, "encoder_q-layer.5": 2674.6921, "encoder_q-layer.6": 2541.4771, "encoder_q-layer.7": 2517.5327, "encoder_q-layer.8": 2701.5256, "encoder_q-layer.9": 2389.5632, "epoch": 0.53, "inbatch_neg_score": 0.4664, "inbatch_pos_score": 1.0156, "learning_rate": 1.05e-05, "loss": 3.7438, "norm_diff": 0.1394, "norm_loss": 0.0, "num_token_doc": 66.6329, "num_token_overlap": 11.6676, "num_token_query": 31.325, "num_token_union": 65.0832, "num_word_context": 202.267, "num_word_doc": 49.7485, "num_word_query": 23.2649, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4669.2735, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.467, "query_norm": 1.3527, "queue_k_norm": 1.4944, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.325, "sent_len_1": 66.6329, "sent_len_max_0": 127.26, "sent_len_max_1": 188.0188, "stdk": 0.048, "stdq": 0.0417, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 81100 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7243, "doc_norm": 1.4924, "encoder_q-embeddings": 5077.3486, "encoder_q-layer.0": 3087.5981, "encoder_q-layer.1": 3204.5251, "encoder_q-layer.10": 2619.168, "encoder_q-layer.11": 6370.1226, "encoder_q-layer.2": 2568.2908, "encoder_q-layer.3": 2138.2239, "encoder_q-layer.4": 2204.9304, "encoder_q-layer.5": 2255.7356, "encoder_q-layer.6": 2280.8274, "encoder_q-layer.7": 2330.4358, "encoder_q-layer.8": 2467.7651, "encoder_q-layer.9": 2256.5574, "epoch": 0.53, "inbatch_neg_score": 0.4683, "inbatch_pos_score": 1.0381, "learning_rate": 1.0444444444444445e-05, "loss": 3.7243, "norm_diff": 0.1213, "norm_loss": 0.0, "num_token_doc": 67.0039, "num_token_overlap": 11.6943, "num_token_query": 31.3829, "num_token_union": 65.2227, "num_word_context": 202.0756, "num_word_doc": 49.9858, "num_word_query": 23.2973, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5180.4725, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4683, "query_norm": 1.3711, "queue_k_norm": 1.4941, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3829, "sent_len_1": 67.0039, "sent_len_max_0": 127.435, "sent_len_max_1": 190.4112, "stdk": 0.048, "stdq": 0.0425, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 81200 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.7242, "doc_norm": 1.4913, "encoder_q-embeddings": 1128.4008, "encoder_q-layer.0": 717.1568, "encoder_q-layer.1": 756.3575, "encoder_q-layer.10": 1302.5029, "encoder_q-layer.11": 3157.8372, "encoder_q-layer.2": 848.7592, "encoder_q-layer.3": 897.6948, "encoder_q-layer.4": 944.553, "encoder_q-layer.5": 949.2663, "encoder_q-layer.6": 1051.6605, "encoder_q-layer.7": 1160.1588, "encoder_q-layer.8": 1274.4694, "encoder_q-layer.9": 1177.1285, "epoch": 0.53, "inbatch_neg_score": 0.4701, "inbatch_pos_score": 1.0215, "learning_rate": 1.038888888888889e-05, "loss": 3.7242, "norm_diff": 0.1235, "norm_loss": 0.0, "num_token_doc": 66.9297, "num_token_overlap": 11.7072, "num_token_query": 31.4963, "num_token_union": 65.2015, "num_word_context": 202.7946, "num_word_doc": 49.9285, "num_word_query": 23.4028, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2074.2846, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4702, "query_norm": 1.3678, "queue_k_norm": 1.4928, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4963, "sent_len_1": 66.9297, "sent_len_max_0": 127.4313, "sent_len_max_1": 191.3288, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 81300 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.7361, "doc_norm": 1.4878, "encoder_q-embeddings": 1270.6327, "encoder_q-layer.0": 837.5146, "encoder_q-layer.1": 903.8858, "encoder_q-layer.10": 1243.324, "encoder_q-layer.11": 3309.3528, "encoder_q-layer.2": 1053.9862, "encoder_q-layer.3": 1128.5267, "encoder_q-layer.4": 1140.4489, "encoder_q-layer.5": 1136.751, "encoder_q-layer.6": 1130.3405, "encoder_q-layer.7": 1176.9785, "encoder_q-layer.8": 1317.9332, "encoder_q-layer.9": 1205.8182, "epoch": 0.53, "inbatch_neg_score": 0.4704, "inbatch_pos_score": 1.0332, "learning_rate": 1.0333333333333333e-05, "loss": 3.7361, "norm_diff": 0.1324, "norm_loss": 0.0, "num_token_doc": 66.7588, "num_token_overlap": 11.6901, "num_token_query": 31.3007, "num_token_union": 65.0584, "num_word_context": 202.3202, "num_word_doc": 49.8474, "num_word_query": 23.2261, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2228.1661, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.47, "query_norm": 1.3554, "queue_k_norm": 1.4946, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3007, "sent_len_1": 66.7588, "sent_len_max_0": 127.5713, "sent_len_max_1": 188.9025, "stdk": 0.0478, "stdq": 0.0418, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 81400 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.7068, "doc_norm": 1.5011, "encoder_q-embeddings": 1264.213, "encoder_q-layer.0": 887.178, "encoder_q-layer.1": 981.8523, "encoder_q-layer.10": 1267.2811, "encoder_q-layer.11": 3201.8574, "encoder_q-layer.2": 1125.9398, "encoder_q-layer.3": 1058.0071, "encoder_q-layer.4": 1106.9873, "encoder_q-layer.5": 1109.1787, "encoder_q-layer.6": 1112.5471, "encoder_q-layer.7": 1133.2676, "encoder_q-layer.8": 1293.8717, "encoder_q-layer.9": 1202.3157, "epoch": 0.53, "inbatch_neg_score": 0.4707, "inbatch_pos_score": 1.0361, "learning_rate": 1.0277777777777777e-05, "loss": 3.7068, "norm_diff": 0.135, "norm_loss": 0.0, "num_token_doc": 66.6137, "num_token_overlap": 11.6728, "num_token_query": 31.3117, "num_token_union": 64.975, "num_word_context": 202.235, "num_word_doc": 49.7322, "num_word_query": 23.2618, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2177.6005, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4712, "query_norm": 1.366, "queue_k_norm": 1.4957, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3117, "sent_len_1": 66.6137, "sent_len_max_0": 127.56, "sent_len_max_1": 189.07, "stdk": 0.0483, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 81500 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.7186, "doc_norm": 1.4949, "encoder_q-embeddings": 1267.5969, "encoder_q-layer.0": 926.7126, "encoder_q-layer.1": 1000.911, "encoder_q-layer.10": 1181.7511, "encoder_q-layer.11": 3068.4116, "encoder_q-layer.2": 1252.1638, "encoder_q-layer.3": 1175.6542, "encoder_q-layer.4": 1156.717, "encoder_q-layer.5": 1125.2722, "encoder_q-layer.6": 1043.0254, "encoder_q-layer.7": 1195.7791, "encoder_q-layer.8": 1315.9686, "encoder_q-layer.9": 1107.6935, "epoch": 0.53, "inbatch_neg_score": 0.4729, "inbatch_pos_score": 1.042, "learning_rate": 1.0222222222222223e-05, "loss": 3.7186, "norm_diff": 0.1307, "norm_loss": 0.0, "num_token_doc": 66.6398, "num_token_overlap": 11.727, "num_token_query": 31.4313, "num_token_union": 65.0604, "num_word_context": 202.387, "num_word_doc": 49.7257, "num_word_query": 23.351, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2161.019, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4731, "query_norm": 1.3642, "queue_k_norm": 1.4949, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4313, "sent_len_1": 66.6398, "sent_len_max_0": 127.4762, "sent_len_max_1": 189.195, "stdk": 0.048, "stdq": 0.0421, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 81600 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.7264, "doc_norm": 1.4936, "encoder_q-embeddings": 2660.26, "encoder_q-layer.0": 2125.022, "encoder_q-layer.1": 2293.0889, "encoder_q-layer.10": 1201.9392, "encoder_q-layer.11": 2971.1582, "encoder_q-layer.2": 2984.7727, "encoder_q-layer.3": 2448.1357, "encoder_q-layer.4": 2414.071, "encoder_q-layer.5": 1863.6807, "encoder_q-layer.6": 1592.321, "encoder_q-layer.7": 1349.0323, "encoder_q-layer.8": 1394.9489, "encoder_q-layer.9": 1216.0469, "epoch": 0.53, "inbatch_neg_score": 0.4738, "inbatch_pos_score": 1.0674, "learning_rate": 1.0166666666666667e-05, "loss": 3.7264, "norm_diff": 0.1091, "norm_loss": 0.0, "num_token_doc": 66.8216, "num_token_overlap": 11.6768, "num_token_query": 31.3952, "num_token_union": 65.1267, "num_word_context": 202.417, "num_word_doc": 49.9158, "num_word_query": 23.3315, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3285.87, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4746, "query_norm": 1.3845, "queue_k_norm": 1.4957, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3952, "sent_len_1": 66.8216, "sent_len_max_0": 127.5187, "sent_len_max_1": 188.1125, "stdk": 0.048, "stdq": 0.0429, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 81700 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.7148, "doc_norm": 1.494, "encoder_q-embeddings": 1332.2677, "encoder_q-layer.0": 936.0366, "encoder_q-layer.1": 948.6878, "encoder_q-layer.10": 1239.5238, "encoder_q-layer.11": 3119.1709, "encoder_q-layer.2": 1094.5713, "encoder_q-layer.3": 1151.5585, "encoder_q-layer.4": 1113.589, "encoder_q-layer.5": 1095.1237, "encoder_q-layer.6": 1178.833, "encoder_q-layer.7": 1209.9747, "encoder_q-layer.8": 1379.224, "encoder_q-layer.9": 1200.7041, "epoch": 0.53, "inbatch_neg_score": 0.4819, "inbatch_pos_score": 1.0635, "learning_rate": 1.0111111111111111e-05, "loss": 3.7148, "norm_diff": 0.0958, "norm_loss": 0.0, "num_token_doc": 66.8881, "num_token_overlap": 11.6986, "num_token_query": 31.4063, "num_token_union": 65.1427, "num_word_context": 202.2531, "num_word_doc": 49.8789, "num_word_query": 23.3355, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2190.1147, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4802, "query_norm": 1.3982, "queue_k_norm": 1.4941, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4063, "sent_len_1": 66.8881, "sent_len_max_0": 127.4775, "sent_len_max_1": 190.3862, "stdk": 0.0479, "stdq": 0.0433, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 81800 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.7418, "doc_norm": 1.502, "encoder_q-embeddings": 1541.0482, "encoder_q-layer.0": 1113.6268, "encoder_q-layer.1": 1294.9573, "encoder_q-layer.10": 1280.6462, "encoder_q-layer.11": 3129.9443, "encoder_q-layer.2": 1407.8282, "encoder_q-layer.3": 1459.8611, "encoder_q-layer.4": 1668.0798, "encoder_q-layer.5": 1929.47, "encoder_q-layer.6": 1642.421, "encoder_q-layer.7": 1523.0461, "encoder_q-layer.8": 1469.8153, "encoder_q-layer.9": 1172.8225, "epoch": 0.53, "inbatch_neg_score": 0.4814, "inbatch_pos_score": 1.0645, "learning_rate": 1.0055555555555555e-05, "loss": 3.7418, "norm_diff": 0.1278, "norm_loss": 0.0, "num_token_doc": 66.7973, "num_token_overlap": 11.6512, "num_token_query": 31.3712, "num_token_union": 65.1273, "num_word_context": 202.282, "num_word_doc": 49.8129, "num_word_query": 23.3014, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2551.3557, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4802, "query_norm": 1.3742, "queue_k_norm": 1.4944, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3712, "sent_len_1": 66.7973, "sent_len_max_0": 127.5337, "sent_len_max_1": 191.2388, "stdk": 0.0483, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 81900 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.7465, "doc_norm": 1.4995, "encoder_q-embeddings": 1493.1252, "encoder_q-layer.0": 1076.2885, "encoder_q-layer.1": 1185.0652, "encoder_q-layer.10": 1397.8608, "encoder_q-layer.11": 3420.0239, "encoder_q-layer.2": 1515.208, "encoder_q-layer.3": 1529.0594, "encoder_q-layer.4": 1490.0101, "encoder_q-layer.5": 1529.0026, "encoder_q-layer.6": 1721.181, "encoder_q-layer.7": 1599.8584, "encoder_q-layer.8": 1532.7184, "encoder_q-layer.9": 1343.0834, "epoch": 0.53, "inbatch_neg_score": 0.4838, "inbatch_pos_score": 1.042, "learning_rate": 1e-05, "loss": 3.7465, "norm_diff": 0.1332, "norm_loss": 0.0, "num_token_doc": 66.5675, "num_token_overlap": 11.6646, "num_token_query": 31.3514, "num_token_union": 64.9749, "num_word_context": 202.2642, "num_word_doc": 49.69, "num_word_query": 23.2879, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2563.451, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4834, "query_norm": 1.3663, "queue_k_norm": 1.4935, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3514, "sent_len_1": 66.5675, "sent_len_max_0": 127.5125, "sent_len_max_1": 187.8562, "stdk": 0.0482, "stdq": 0.0419, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 82000 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.7243, "doc_norm": 1.4916, "encoder_q-embeddings": 1493.6415, "encoder_q-layer.0": 1038.3087, "encoder_q-layer.1": 1079.2963, "encoder_q-layer.10": 1252.7542, "encoder_q-layer.11": 3218.5276, "encoder_q-layer.2": 1268.6964, "encoder_q-layer.3": 1293.6926, "encoder_q-layer.4": 1390.7329, "encoder_q-layer.5": 1533.3521, "encoder_q-layer.6": 1514.3545, "encoder_q-layer.7": 1419.9309, "encoder_q-layer.8": 1407.9047, "encoder_q-layer.9": 1196.1907, "epoch": 0.53, "inbatch_neg_score": 0.4821, "inbatch_pos_score": 1.0449, "learning_rate": 9.944444444444445e-06, "loss": 3.7243, "norm_diff": 0.1146, "norm_loss": 0.0, "num_token_doc": 66.6292, "num_token_overlap": 11.6309, "num_token_query": 31.2605, "num_token_union": 64.9893, "num_word_context": 202.1885, "num_word_doc": 49.7521, "num_word_query": 23.2173, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2433.4819, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4817, "query_norm": 1.377, "queue_k_norm": 1.4955, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2605, "sent_len_1": 66.6292, "sent_len_max_0": 127.5012, "sent_len_max_1": 187.2175, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 82100 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.7067, "doc_norm": 1.4891, "encoder_q-embeddings": 4164.0005, "encoder_q-layer.0": 3136.0413, "encoder_q-layer.1": 3294.72, "encoder_q-layer.10": 1265.0214, "encoder_q-layer.11": 3236.3909, "encoder_q-layer.2": 4176.4243, "encoder_q-layer.3": 4278.625, "encoder_q-layer.4": 4097.0361, "encoder_q-layer.5": 3277.4031, "encoder_q-layer.6": 2830.2817, "encoder_q-layer.7": 2357.1719, "encoder_q-layer.8": 1750.2151, "encoder_q-layer.9": 1311.681, "epoch": 0.54, "inbatch_neg_score": 0.4847, "inbatch_pos_score": 1.0596, "learning_rate": 9.888888888888889e-06, "loss": 3.7067, "norm_diff": 0.1144, "norm_loss": 0.0, "num_token_doc": 66.6913, "num_token_overlap": 11.6269, "num_token_query": 31.2943, "num_token_union": 65.077, "num_word_context": 202.1693, "num_word_doc": 49.7938, "num_word_query": 23.2398, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4986.2597, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4839, "query_norm": 1.3747, "queue_k_norm": 1.4965, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2943, "sent_len_1": 66.6913, "sent_len_max_0": 127.5125, "sent_len_max_1": 189.0625, "stdk": 0.0477, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 82200 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.7311, "doc_norm": 1.4925, "encoder_q-embeddings": 1022.6661, "encoder_q-layer.0": 644.5448, "encoder_q-layer.1": 667.5311, "encoder_q-layer.10": 1291.1943, "encoder_q-layer.11": 3078.8096, "encoder_q-layer.2": 756.4532, "encoder_q-layer.3": 814.522, "encoder_q-layer.4": 864.838, "encoder_q-layer.5": 858.0389, "encoder_q-layer.6": 957.8554, "encoder_q-layer.7": 1061.0723, "encoder_q-layer.8": 1280.0045, "encoder_q-layer.9": 1203.1316, "epoch": 0.54, "inbatch_neg_score": 0.484, "inbatch_pos_score": 1.0684, "learning_rate": 9.833333333333333e-06, "loss": 3.7311, "norm_diff": 0.1114, "norm_loss": 0.0, "num_token_doc": 66.6987, "num_token_overlap": 11.672, "num_token_query": 31.4679, "num_token_union": 65.1079, "num_word_context": 202.0234, "num_word_doc": 49.7387, "num_word_query": 23.3607, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1972.7968, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4858, "query_norm": 1.3811, "queue_k_norm": 1.4957, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4679, "sent_len_1": 66.6987, "sent_len_max_0": 127.345, "sent_len_max_1": 189.23, "stdk": 0.0479, "stdq": 0.0426, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 82300 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.7178, "doc_norm": 1.4981, "encoder_q-embeddings": 3303.5076, "encoder_q-layer.0": 2190.9917, "encoder_q-layer.1": 2344.5964, "encoder_q-layer.10": 1288.0569, "encoder_q-layer.11": 3372.3921, "encoder_q-layer.2": 2602.8096, "encoder_q-layer.3": 2804.8647, "encoder_q-layer.4": 2992.8972, "encoder_q-layer.5": 2954.2146, "encoder_q-layer.6": 2676.7874, "encoder_q-layer.7": 2375.7917, "encoder_q-layer.8": 2326.3157, "encoder_q-layer.9": 1417.3618, "epoch": 0.54, "inbatch_neg_score": 0.4862, "inbatch_pos_score": 1.0596, "learning_rate": 9.777777777777779e-06, "loss": 3.7178, "norm_diff": 0.1234, "norm_loss": 0.0, "num_token_doc": 66.898, "num_token_overlap": 11.6835, "num_token_query": 31.4219, "num_token_union": 65.1912, "num_word_context": 202.5626, "num_word_doc": 49.9318, "num_word_query": 23.3358, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3999.8953, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4873, "query_norm": 1.3747, "queue_k_norm": 1.4964, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4219, "sent_len_1": 66.898, "sent_len_max_0": 127.54, "sent_len_max_1": 188.4338, "stdk": 0.0481, "stdq": 0.0422, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 82400 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.7445, "doc_norm": 1.5017, "encoder_q-embeddings": 2759.0225, "encoder_q-layer.0": 2002.0017, "encoder_q-layer.1": 2130.0769, "encoder_q-layer.10": 1273.1631, "encoder_q-layer.11": 3104.1804, "encoder_q-layer.2": 2439.5168, "encoder_q-layer.3": 2396.7869, "encoder_q-layer.4": 2336.7664, "encoder_q-layer.5": 2381.3518, "encoder_q-layer.6": 2051.0796, "encoder_q-layer.7": 1546.8014, "encoder_q-layer.8": 1508.1801, "encoder_q-layer.9": 1175.838, "epoch": 0.54, "inbatch_neg_score": 0.4908, "inbatch_pos_score": 1.0566, "learning_rate": 9.722222222222223e-06, "loss": 3.7445, "norm_diff": 0.1195, "norm_loss": 0.0, "num_token_doc": 66.7827, "num_token_overlap": 11.6435, "num_token_query": 31.4395, "num_token_union": 65.1928, "num_word_context": 202.1661, "num_word_doc": 49.8011, "num_word_query": 23.3609, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3316.8166, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4905, "query_norm": 1.3822, "queue_k_norm": 1.4973, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4395, "sent_len_1": 66.7827, "sent_len_max_0": 127.4112, "sent_len_max_1": 190.0387, "stdk": 0.0482, "stdq": 0.0425, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 82500 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.7289, "doc_norm": 1.4969, "encoder_q-embeddings": 1203.8392, "encoder_q-layer.0": 806.1071, "encoder_q-layer.1": 863.0941, "encoder_q-layer.10": 1287.7064, "encoder_q-layer.11": 3202.1064, "encoder_q-layer.2": 979.7792, "encoder_q-layer.3": 989.0372, "encoder_q-layer.4": 970.3325, "encoder_q-layer.5": 967.5369, "encoder_q-layer.6": 1053.6924, "encoder_q-layer.7": 1118.8916, "encoder_q-layer.8": 1250.7794, "encoder_q-layer.9": 1173.6305, "epoch": 0.54, "inbatch_neg_score": 0.4891, "inbatch_pos_score": 1.0654, "learning_rate": 9.666666666666667e-06, "loss": 3.7289, "norm_diff": 0.1083, "norm_loss": 0.0, "num_token_doc": 66.8193, "num_token_overlap": 11.6757, "num_token_query": 31.4858, "num_token_union": 65.1994, "num_word_context": 202.2478, "num_word_doc": 49.8277, "num_word_query": 23.3815, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2122.9054, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.49, "query_norm": 1.3886, "queue_k_norm": 1.4962, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4858, "sent_len_1": 66.8193, "sent_len_max_0": 127.6188, "sent_len_max_1": 190.845, "stdk": 0.048, "stdq": 0.0428, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 82600 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.7253, "doc_norm": 1.4973, "encoder_q-embeddings": 3515.1812, "encoder_q-layer.0": 2439.4734, "encoder_q-layer.1": 2878.6292, "encoder_q-layer.10": 1350.2286, "encoder_q-layer.11": 3407.1421, "encoder_q-layer.2": 3315.6577, "encoder_q-layer.3": 3370.262, "encoder_q-layer.4": 3202.2664, "encoder_q-layer.5": 2908.8684, "encoder_q-layer.6": 2918.4124, "encoder_q-layer.7": 2134.4939, "encoder_q-layer.8": 1975.0513, "encoder_q-layer.9": 1329.7461, "epoch": 0.54, "inbatch_neg_score": 0.4934, "inbatch_pos_score": 1.0518, "learning_rate": 9.61111111111111e-06, "loss": 3.7253, "norm_diff": 0.1265, "norm_loss": 0.0, "num_token_doc": 66.974, "num_token_overlap": 11.6826, "num_token_query": 31.3237, "num_token_union": 65.2011, "num_word_context": 202.53, "num_word_doc": 50.0067, "num_word_query": 23.2501, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4256.0574, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4932, "query_norm": 1.3708, "queue_k_norm": 1.4983, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3237, "sent_len_1": 66.974, "sent_len_max_0": 127.5125, "sent_len_max_1": 188.7837, "stdk": 0.048, "stdq": 0.042, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 82700 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 3.7215, "doc_norm": 1.4931, "encoder_q-embeddings": 1013.3691, "encoder_q-layer.0": 686.6143, "encoder_q-layer.1": 690.9271, "encoder_q-layer.10": 1353.047, "encoder_q-layer.11": 3344.5837, "encoder_q-layer.2": 778.3128, "encoder_q-layer.3": 790.4056, "encoder_q-layer.4": 815.5397, "encoder_q-layer.5": 811.3982, "encoder_q-layer.6": 912.0907, "encoder_q-layer.7": 1003.9987, "encoder_q-layer.8": 1235.5054, "encoder_q-layer.9": 1208.9496, "epoch": 0.54, "inbatch_neg_score": 0.492, "inbatch_pos_score": 1.0439, "learning_rate": 9.555555555555556e-06, "loss": 3.7215, "norm_diff": 0.1217, "norm_loss": 0.0, "num_token_doc": 66.654, "num_token_overlap": 11.7325, "num_token_query": 31.5177, "num_token_union": 65.1159, "num_word_context": 202.4532, "num_word_doc": 49.7345, "num_word_query": 23.4167, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2049.3216, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4924, "query_norm": 1.3714, "queue_k_norm": 1.4998, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5177, "sent_len_1": 66.654, "sent_len_max_0": 127.5288, "sent_len_max_1": 187.19, "stdk": 0.0478, "stdq": 0.0421, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 82800 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.7024, "doc_norm": 1.5002, "encoder_q-embeddings": 2830.7756, "encoder_q-layer.0": 2073.6575, "encoder_q-layer.1": 2392.0044, "encoder_q-layer.10": 1341.4185, "encoder_q-layer.11": 3165.7314, "encoder_q-layer.2": 2607.6606, "encoder_q-layer.3": 2809.7766, "encoder_q-layer.4": 3378.2942, "encoder_q-layer.5": 3381.426, "encoder_q-layer.6": 3324.0806, "encoder_q-layer.7": 2624.4895, "encoder_q-layer.8": 2015.7036, "encoder_q-layer.9": 1298.5953, "epoch": 0.54, "inbatch_neg_score": 0.4931, "inbatch_pos_score": 1.0732, "learning_rate": 9.5e-06, "loss": 3.7024, "norm_diff": 0.1159, "norm_loss": 0.0, "num_token_doc": 66.6431, "num_token_overlap": 11.7237, "num_token_query": 31.4836, "num_token_union": 65.0531, "num_word_context": 202.0555, "num_word_doc": 49.7402, "num_word_query": 23.3937, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4017.7766, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4934, "query_norm": 1.3843, "queue_k_norm": 1.4988, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4836, "sent_len_1": 66.6431, "sent_len_max_0": 127.5837, "sent_len_max_1": 187.7138, "stdk": 0.0481, "stdq": 0.0426, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 82900 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.7097, "doc_norm": 1.4922, "encoder_q-embeddings": 1728.402, "encoder_q-layer.0": 1188.9602, "encoder_q-layer.1": 1400.5901, "encoder_q-layer.10": 1244.6973, "encoder_q-layer.11": 3168.9546, "encoder_q-layer.2": 1681.7042, "encoder_q-layer.3": 1773.8865, "encoder_q-layer.4": 1834.9082, "encoder_q-layer.5": 1792.3219, "encoder_q-layer.6": 1808.1732, "encoder_q-layer.7": 1757.6527, "encoder_q-layer.8": 1624.0795, "encoder_q-layer.9": 1257.5659, "epoch": 0.54, "inbatch_neg_score": 0.4961, "inbatch_pos_score": 1.0723, "learning_rate": 9.444444444444445e-06, "loss": 3.7097, "norm_diff": 0.109, "norm_loss": 0.0, "num_token_doc": 66.7669, "num_token_overlap": 11.7151, "num_token_query": 31.523, "num_token_union": 65.1362, "num_word_context": 202.2053, "num_word_doc": 49.8148, "num_word_query": 23.4082, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2708.0937, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4954, "query_norm": 1.3832, "queue_k_norm": 1.5002, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.523, "sent_len_1": 66.7669, "sent_len_max_0": 127.7, "sent_len_max_1": 189.8837, "stdk": 0.0477, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 83000 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.7198, "doc_norm": 1.4965, "encoder_q-embeddings": 2146.4805, "encoder_q-layer.0": 1664.7601, "encoder_q-layer.1": 1703.0902, "encoder_q-layer.10": 1248.7887, "encoder_q-layer.11": 3217.8879, "encoder_q-layer.2": 1968.1694, "encoder_q-layer.3": 1759.2681, "encoder_q-layer.4": 1728.4622, "encoder_q-layer.5": 1672.8363, "encoder_q-layer.6": 1742.8358, "encoder_q-layer.7": 1461.6436, "encoder_q-layer.8": 1318.9796, "encoder_q-layer.9": 1159.4237, "epoch": 0.54, "inbatch_neg_score": 0.4956, "inbatch_pos_score": 1.0684, "learning_rate": 9.388888888888889e-06, "loss": 3.7198, "norm_diff": 0.1137, "norm_loss": 0.0, "num_token_doc": 66.7348, "num_token_overlap": 11.728, "num_token_query": 31.4881, "num_token_union": 65.1236, "num_word_context": 202.0943, "num_word_doc": 49.8485, "num_word_query": 23.3987, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2844.0619, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4956, "query_norm": 1.3828, "queue_k_norm": 1.4968, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4881, "sent_len_1": 66.7348, "sent_len_max_0": 127.5225, "sent_len_max_1": 188.1962, "stdk": 0.0479, "stdq": 0.0426, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 83100 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.7311, "doc_norm": 1.4965, "encoder_q-embeddings": 715.3224, "encoder_q-layer.0": 495.9883, "encoder_q-layer.1": 560.4847, "encoder_q-layer.10": 611.8735, "encoder_q-layer.11": 1569.0184, "encoder_q-layer.2": 649.1172, "encoder_q-layer.3": 760.9839, "encoder_q-layer.4": 768.8251, "encoder_q-layer.5": 736.7048, "encoder_q-layer.6": 802.9972, "encoder_q-layer.7": 742.3942, "encoder_q-layer.8": 721.7792, "encoder_q-layer.9": 626.0439, "epoch": 0.54, "inbatch_neg_score": 0.4985, "inbatch_pos_score": 1.0664, "learning_rate": 9.333333333333334e-06, "loss": 3.7311, "norm_diff": 0.1173, "norm_loss": 0.0, "num_token_doc": 66.5054, "num_token_overlap": 11.6597, "num_token_query": 31.3383, "num_token_union": 64.9488, "num_word_context": 202.0379, "num_word_doc": 49.6134, "num_word_query": 23.2676, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1216.9963, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4988, "query_norm": 1.3792, "queue_k_norm": 1.4988, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3383, "sent_len_1": 66.5054, "sent_len_max_0": 127.3275, "sent_len_max_1": 189.53, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 83200 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.7041, "doc_norm": 1.5022, "encoder_q-embeddings": 1220.4614, "encoder_q-layer.0": 881.5247, "encoder_q-layer.1": 931.9487, "encoder_q-layer.10": 635.2179, "encoder_q-layer.11": 1603.6689, "encoder_q-layer.2": 1032.1636, "encoder_q-layer.3": 1144.0138, "encoder_q-layer.4": 1475.3127, "encoder_q-layer.5": 1543.2448, "encoder_q-layer.6": 1426.0764, "encoder_q-layer.7": 1011.0767, "encoder_q-layer.8": 930.5529, "encoder_q-layer.9": 659.7591, "epoch": 0.54, "inbatch_neg_score": 0.4987, "inbatch_pos_score": 1.0566, "learning_rate": 9.277777777777778e-06, "loss": 3.7041, "norm_diff": 0.1187, "norm_loss": 0.0, "num_token_doc": 66.8156, "num_token_overlap": 11.6919, "num_token_query": 31.3544, "num_token_union": 65.1635, "num_word_context": 202.3241, "num_word_doc": 49.8654, "num_word_query": 23.2742, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1756.475, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4988, "query_norm": 1.3835, "queue_k_norm": 1.4987, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3544, "sent_len_1": 66.8156, "sent_len_max_0": 127.3713, "sent_len_max_1": 190.075, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 83300 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.7317, "doc_norm": 1.4969, "encoder_q-embeddings": 621.3972, "encoder_q-layer.0": 413.7391, "encoder_q-layer.1": 451.0154, "encoder_q-layer.10": 676.9672, "encoder_q-layer.11": 1652.4946, "encoder_q-layer.2": 518.0851, "encoder_q-layer.3": 538.9772, "encoder_q-layer.4": 595.1934, "encoder_q-layer.5": 632.9617, "encoder_q-layer.6": 689.4113, "encoder_q-layer.7": 712.7968, "encoder_q-layer.8": 712.2574, "encoder_q-layer.9": 598.2424, "epoch": 0.54, "inbatch_neg_score": 0.5025, "inbatch_pos_score": 1.0566, "learning_rate": 9.222222222222222e-06, "loss": 3.7317, "norm_diff": 0.112, "norm_loss": 0.0, "num_token_doc": 66.742, "num_token_overlap": 11.6806, "num_token_query": 31.416, "num_token_union": 65.1253, "num_word_context": 202.5545, "num_word_doc": 49.8551, "num_word_query": 23.3464, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1149.418, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.502, "query_norm": 1.3849, "queue_k_norm": 1.5009, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.416, "sent_len_1": 66.742, "sent_len_max_0": 127.4625, "sent_len_max_1": 189.3162, "stdk": 0.0479, "stdq": 0.0425, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 83400 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.7143, "doc_norm": 1.5028, "encoder_q-embeddings": 531.8719, "encoder_q-layer.0": 364.4898, "encoder_q-layer.1": 384.984, "encoder_q-layer.10": 611.8698, "encoder_q-layer.11": 1604.5518, "encoder_q-layer.2": 435.9399, "encoder_q-layer.3": 451.5915, "encoder_q-layer.4": 463.1246, "encoder_q-layer.5": 467.7743, "encoder_q-layer.6": 497.3086, "encoder_q-layer.7": 526.9418, "encoder_q-layer.8": 610.8999, "encoder_q-layer.9": 581.2803, "epoch": 0.54, "inbatch_neg_score": 0.5016, "inbatch_pos_score": 1.082, "learning_rate": 9.166666666666666e-06, "loss": 3.7143, "norm_diff": 0.1324, "norm_loss": 0.0, "num_token_doc": 66.8606, "num_token_overlap": 11.6875, "num_token_query": 31.3224, "num_token_union": 65.14, "num_word_context": 202.7644, "num_word_doc": 49.9292, "num_word_query": 23.2624, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1038.4835, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5029, "query_norm": 1.3703, "queue_k_norm": 1.5007, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3224, "sent_len_1": 66.8606, "sent_len_max_0": 127.48, "sent_len_max_1": 188.9475, "stdk": 0.0481, "stdq": 0.0419, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 83500 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 3.7359, "doc_norm": 1.4895, "encoder_q-embeddings": 602.7652, "encoder_q-layer.0": 428.0992, "encoder_q-layer.1": 451.1904, "encoder_q-layer.10": 649.2736, "encoder_q-layer.11": 1649.679, "encoder_q-layer.2": 512.7186, "encoder_q-layer.3": 533.1053, "encoder_q-layer.4": 545.5175, "encoder_q-layer.5": 555.543, "encoder_q-layer.6": 590.2699, "encoder_q-layer.7": 603.5599, "encoder_q-layer.8": 647.9835, "encoder_q-layer.9": 584.3253, "epoch": 0.54, "inbatch_neg_score": 0.5047, "inbatch_pos_score": 1.0469, "learning_rate": 9.111111111111112e-06, "loss": 3.7359, "norm_diff": 0.1154, "norm_loss": 0.0, "num_token_doc": 66.8948, "num_token_overlap": 11.6372, "num_token_query": 31.3096, "num_token_union": 65.15, "num_word_context": 201.9265, "num_word_doc": 49.8392, "num_word_query": 23.2557, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1107.0743, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5034, "query_norm": 1.3741, "queue_k_norm": 1.5001, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3096, "sent_len_1": 66.8948, "sent_len_max_0": 127.415, "sent_len_max_1": 191.1337, "stdk": 0.0475, "stdq": 0.042, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 83600 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.715, "doc_norm": 1.5004, "encoder_q-embeddings": 585.3604, "encoder_q-layer.0": 390.1333, "encoder_q-layer.1": 424.1084, "encoder_q-layer.10": 644.0779, "encoder_q-layer.11": 1599.6503, "encoder_q-layer.2": 485.7189, "encoder_q-layer.3": 518.4705, "encoder_q-layer.4": 543.1541, "encoder_q-layer.5": 514.7812, "encoder_q-layer.6": 549.4202, "encoder_q-layer.7": 599.2012, "encoder_q-layer.8": 660.6512, "encoder_q-layer.9": 580.9126, "epoch": 0.54, "inbatch_neg_score": 0.5045, "inbatch_pos_score": 1.0693, "learning_rate": 9.055555555555556e-06, "loss": 3.715, "norm_diff": 0.125, "norm_loss": 0.0, "num_token_doc": 66.9946, "num_token_overlap": 11.7082, "num_token_query": 31.384, "num_token_union": 65.2394, "num_word_context": 202.3542, "num_word_doc": 49.9916, "num_word_query": 23.3032, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1080.6378, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5029, "query_norm": 1.3754, "queue_k_norm": 1.5011, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.384, "sent_len_1": 66.9946, "sent_len_max_0": 127.48, "sent_len_max_1": 190.035, "stdk": 0.0479, "stdq": 0.0421, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 83700 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.7249, "doc_norm": 1.5041, "encoder_q-embeddings": 780.2137, "encoder_q-layer.0": 517.9286, "encoder_q-layer.1": 558.6597, "encoder_q-layer.10": 643.9849, "encoder_q-layer.11": 1662.9674, "encoder_q-layer.2": 644.9463, "encoder_q-layer.3": 651.4856, "encoder_q-layer.4": 733.9409, "encoder_q-layer.5": 732.8966, "encoder_q-layer.6": 770.6023, "encoder_q-layer.7": 736.905, "encoder_q-layer.8": 700.2106, "encoder_q-layer.9": 600.6661, "epoch": 0.55, "inbatch_neg_score": 0.5036, "inbatch_pos_score": 1.0781, "learning_rate": 9e-06, "loss": 3.7249, "norm_diff": 0.1186, "norm_loss": 0.0, "num_token_doc": 67.0348, "num_token_overlap": 11.6442, "num_token_query": 31.2819, "num_token_union": 65.2285, "num_word_context": 202.3728, "num_word_doc": 49.9992, "num_word_query": 23.2428, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1238.6418, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5049, "query_norm": 1.3855, "queue_k_norm": 1.5032, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2819, "sent_len_1": 67.0348, "sent_len_max_0": 127.3612, "sent_len_max_1": 190.1287, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 83800 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.717, "doc_norm": 1.5073, "encoder_q-embeddings": 506.7054, "encoder_q-layer.0": 337.1723, "encoder_q-layer.1": 371.5968, "encoder_q-layer.10": 715.2194, "encoder_q-layer.11": 1624.9908, "encoder_q-layer.2": 439.0866, "encoder_q-layer.3": 454.9987, "encoder_q-layer.4": 468.8939, "encoder_q-layer.5": 481.782, "encoder_q-layer.6": 560.0334, "encoder_q-layer.7": 588.3913, "encoder_q-layer.8": 662.4188, "encoder_q-layer.9": 607.4824, "epoch": 0.55, "inbatch_neg_score": 0.5032, "inbatch_pos_score": 1.0977, "learning_rate": 8.944444444444444e-06, "loss": 3.717, "norm_diff": 0.1239, "norm_loss": 0.0, "num_token_doc": 66.6543, "num_token_overlap": 11.6757, "num_token_query": 31.4071, "num_token_union": 65.0876, "num_word_context": 202.2723, "num_word_doc": 49.7694, "num_word_query": 23.3661, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1032.5622, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5039, "query_norm": 1.3834, "queue_k_norm": 1.5014, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4071, "sent_len_1": 66.6543, "sent_len_max_0": 127.5113, "sent_len_max_1": 187.16, "stdk": 0.0482, "stdq": 0.0425, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 83900 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.73, "doc_norm": 1.5048, "encoder_q-embeddings": 1352.6788, "encoder_q-layer.0": 896.322, "encoder_q-layer.1": 1014.757, "encoder_q-layer.10": 634.5609, "encoder_q-layer.11": 1639.3413, "encoder_q-layer.2": 1079.985, "encoder_q-layer.3": 1110.9684, "encoder_q-layer.4": 1166.495, "encoder_q-layer.5": 1145.2405, "encoder_q-layer.6": 998.7896, "encoder_q-layer.7": 771.3002, "encoder_q-layer.8": 729.6112, "encoder_q-layer.9": 616.9175, "epoch": 0.55, "inbatch_neg_score": 0.5066, "inbatch_pos_score": 1.0801, "learning_rate": 8.88888888888889e-06, "loss": 3.73, "norm_diff": 0.1237, "norm_loss": 0.0, "num_token_doc": 66.7621, "num_token_overlap": 11.6272, "num_token_query": 31.2696, "num_token_union": 65.0777, "num_word_context": 202.4617, "num_word_doc": 49.817, "num_word_query": 23.2239, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1624.234, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5078, "query_norm": 1.381, "queue_k_norm": 1.5021, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2696, "sent_len_1": 66.7621, "sent_len_max_0": 127.3775, "sent_len_max_1": 189.7325, "stdk": 0.0481, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 84000 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.7314, "doc_norm": 1.5081, "encoder_q-embeddings": 980.4428, "encoder_q-layer.0": 650.9406, "encoder_q-layer.1": 741.7231, "encoder_q-layer.10": 664.1111, "encoder_q-layer.11": 1605.5698, "encoder_q-layer.2": 843.9596, "encoder_q-layer.3": 896.2985, "encoder_q-layer.4": 955.7357, "encoder_q-layer.5": 982.9658, "encoder_q-layer.6": 1018.4449, "encoder_q-layer.7": 957.6116, "encoder_q-layer.8": 730.5954, "encoder_q-layer.9": 580.3996, "epoch": 0.55, "inbatch_neg_score": 0.509, "inbatch_pos_score": 1.0703, "learning_rate": 8.833333333333334e-06, "loss": 3.7314, "norm_diff": 0.1195, "norm_loss": 0.0, "num_token_doc": 66.7762, "num_token_overlap": 11.6153, "num_token_query": 31.1892, "num_token_union": 65.0819, "num_word_context": 202.2378, "num_word_doc": 49.8087, "num_word_query": 23.1289, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1409.7709, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5078, "query_norm": 1.3886, "queue_k_norm": 1.5048, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.1892, "sent_len_1": 66.7762, "sent_len_max_0": 127.535, "sent_len_max_1": 189.5725, "stdk": 0.0482, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 84100 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.7174, "doc_norm": 1.5088, "encoder_q-embeddings": 663.1474, "encoder_q-layer.0": 477.1218, "encoder_q-layer.1": 505.2157, "encoder_q-layer.10": 678.0035, "encoder_q-layer.11": 1640.5271, "encoder_q-layer.2": 596.0282, "encoder_q-layer.3": 601.7114, "encoder_q-layer.4": 606.5635, "encoder_q-layer.5": 621.2252, "encoder_q-layer.6": 615.6514, "encoder_q-layer.7": 637.1655, "encoder_q-layer.8": 700.2145, "encoder_q-layer.9": 631.4973, "epoch": 0.55, "inbatch_neg_score": 0.5078, "inbatch_pos_score": 1.0781, "learning_rate": 8.777777777777778e-06, "loss": 3.7174, "norm_diff": 0.1179, "norm_loss": 0.0, "num_token_doc": 66.7742, "num_token_overlap": 11.6576, "num_token_query": 31.3514, "num_token_union": 65.0936, "num_word_context": 202.1038, "num_word_doc": 49.8124, "num_word_query": 23.2887, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1142.0623, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5073, "query_norm": 1.3909, "queue_k_norm": 1.5036, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3514, "sent_len_1": 66.7742, "sent_len_max_0": 127.4188, "sent_len_max_1": 189.2875, "stdk": 0.0482, "stdq": 0.0428, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 84200 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.7363, "doc_norm": 1.5059, "encoder_q-embeddings": 615.5842, "encoder_q-layer.0": 411.9946, "encoder_q-layer.1": 452.9872, "encoder_q-layer.10": 682.2991, "encoder_q-layer.11": 1588.3694, "encoder_q-layer.2": 538.9798, "encoder_q-layer.3": 546.6542, "encoder_q-layer.4": 594.2203, "encoder_q-layer.5": 582.4733, "encoder_q-layer.6": 603.2341, "encoder_q-layer.7": 577.4956, "encoder_q-layer.8": 660.1442, "encoder_q-layer.9": 575.5853, "epoch": 0.55, "inbatch_neg_score": 0.5063, "inbatch_pos_score": 1.0762, "learning_rate": 8.722222222222224e-06, "loss": 3.7363, "norm_diff": 0.1226, "norm_loss": 0.0, "num_token_doc": 66.5414, "num_token_overlap": 11.611, "num_token_query": 31.3279, "num_token_union": 65.0011, "num_word_context": 202.1158, "num_word_doc": 49.6736, "num_word_query": 23.2547, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1093.7139, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5073, "query_norm": 1.3833, "queue_k_norm": 1.5035, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3279, "sent_len_1": 66.5414, "sent_len_max_0": 127.4213, "sent_len_max_1": 190.325, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 84300 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.7163, "doc_norm": 1.504, "encoder_q-embeddings": 584.2973, "encoder_q-layer.0": 388.045, "encoder_q-layer.1": 420.607, "encoder_q-layer.10": 612.3046, "encoder_q-layer.11": 1585.0237, "encoder_q-layer.2": 492.7554, "encoder_q-layer.3": 496.8647, "encoder_q-layer.4": 541.5948, "encoder_q-layer.5": 604.4974, "encoder_q-layer.6": 582.7631, "encoder_q-layer.7": 591.069, "encoder_q-layer.8": 639.1595, "encoder_q-layer.9": 566.5403, "epoch": 0.55, "inbatch_neg_score": 0.505, "inbatch_pos_score": 1.0762, "learning_rate": 8.666666666666668e-06, "loss": 3.7163, "norm_diff": 0.1275, "norm_loss": 0.0, "num_token_doc": 66.8981, "num_token_overlap": 11.7008, "num_token_query": 31.48, "num_token_union": 65.2579, "num_word_context": 203.0957, "num_word_doc": 49.9139, "num_word_query": 23.3702, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1065.9637, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5059, "query_norm": 1.3765, "queue_k_norm": 1.5035, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.48, "sent_len_1": 66.8981, "sent_len_max_0": 127.4125, "sent_len_max_1": 189.095, "stdk": 0.048, "stdq": 0.0422, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 84400 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.7146, "doc_norm": 1.5076, "encoder_q-embeddings": 491.2973, "encoder_q-layer.0": 324.0224, "encoder_q-layer.1": 338.4792, "encoder_q-layer.10": 612.3046, "encoder_q-layer.11": 1583.7283, "encoder_q-layer.2": 388.4867, "encoder_q-layer.3": 394.4034, "encoder_q-layer.4": 412.5484, "encoder_q-layer.5": 443.9589, "encoder_q-layer.6": 495.2626, "encoder_q-layer.7": 565.5163, "encoder_q-layer.8": 671.5894, "encoder_q-layer.9": 604.0631, "epoch": 0.55, "inbatch_neg_score": 0.5091, "inbatch_pos_score": 1.0664, "learning_rate": 8.611111111111112e-06, "loss": 3.7146, "norm_diff": 0.1303, "norm_loss": 0.0, "num_token_doc": 66.7397, "num_token_overlap": 11.6475, "num_token_query": 31.2479, "num_token_union": 65.0531, "num_word_context": 202.1584, "num_word_doc": 49.8212, "num_word_query": 23.2051, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1008.0772, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5093, "query_norm": 1.3773, "queue_k_norm": 1.5052, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2479, "sent_len_1": 66.7397, "sent_len_max_0": 127.4887, "sent_len_max_1": 189.2337, "stdk": 0.0482, "stdq": 0.0422, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 84500 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 3.7308, "doc_norm": 1.5104, "encoder_q-embeddings": 585.268, "encoder_q-layer.0": 404.6154, "encoder_q-layer.1": 418.8212, "encoder_q-layer.10": 621.6866, "encoder_q-layer.11": 1619.822, "encoder_q-layer.2": 490.2118, "encoder_q-layer.3": 514.4649, "encoder_q-layer.4": 510.5901, "encoder_q-layer.5": 511.7663, "encoder_q-layer.6": 535.035, "encoder_q-layer.7": 562.5978, "encoder_q-layer.8": 606.1381, "encoder_q-layer.9": 584.4983, "epoch": 0.55, "inbatch_neg_score": 0.509, "inbatch_pos_score": 1.0742, "learning_rate": 8.555555555555556e-06, "loss": 3.7308, "norm_diff": 0.1357, "norm_loss": 0.0, "num_token_doc": 66.9967, "num_token_overlap": 11.6988, "num_token_query": 31.4004, "num_token_union": 65.2479, "num_word_context": 202.603, "num_word_doc": 49.9607, "num_word_query": 23.3459, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1055.8031, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5093, "query_norm": 1.3747, "queue_k_norm": 1.5056, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4004, "sent_len_1": 66.9967, "sent_len_max_0": 127.5288, "sent_len_max_1": 192.2463, "stdk": 0.0483, "stdq": 0.042, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 84600 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.7332, "doc_norm": 1.5074, "encoder_q-embeddings": 740.5554, "encoder_q-layer.0": 534.559, "encoder_q-layer.1": 575.6519, "encoder_q-layer.10": 602.1448, "encoder_q-layer.11": 1619.6184, "encoder_q-layer.2": 629.3453, "encoder_q-layer.3": 617.6813, "encoder_q-layer.4": 624.5016, "encoder_q-layer.5": 607.3658, "encoder_q-layer.6": 608.7748, "encoder_q-layer.7": 626.0972, "encoder_q-layer.8": 635.7294, "encoder_q-layer.9": 583.2781, "epoch": 0.55, "inbatch_neg_score": 0.5108, "inbatch_pos_score": 1.0713, "learning_rate": 8.500000000000002e-06, "loss": 3.7332, "norm_diff": 0.1356, "norm_loss": 0.0, "num_token_doc": 66.6579, "num_token_overlap": 11.7112, "num_token_query": 31.4006, "num_token_union": 65.0222, "num_word_context": 201.7771, "num_word_doc": 49.7677, "num_word_query": 23.3229, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1168.5367, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5103, "query_norm": 1.3718, "queue_k_norm": 1.5047, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4006, "sent_len_1": 66.6579, "sent_len_max_0": 127.5062, "sent_len_max_1": 189.8088, "stdk": 0.0481, "stdq": 0.0419, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 84700 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.7265, "doc_norm": 1.5117, "encoder_q-embeddings": 613.2863, "encoder_q-layer.0": 438.1245, "encoder_q-layer.1": 450.328, "encoder_q-layer.10": 680.3628, "encoder_q-layer.11": 1675.2194, "encoder_q-layer.2": 510.5264, "encoder_q-layer.3": 526.6884, "encoder_q-layer.4": 565.056, "encoder_q-layer.5": 563.7913, "encoder_q-layer.6": 579.7612, "encoder_q-layer.7": 606.6622, "encoder_q-layer.8": 668.2055, "encoder_q-layer.9": 629.4462, "epoch": 0.55, "inbatch_neg_score": 0.5084, "inbatch_pos_score": 1.0713, "learning_rate": 8.444444444444446e-06, "loss": 3.7265, "norm_diff": 0.1313, "norm_loss": 0.0, "num_token_doc": 66.8135, "num_token_overlap": 11.705, "num_token_query": 31.4618, "num_token_union": 65.156, "num_word_context": 202.3064, "num_word_doc": 49.845, "num_word_query": 23.3634, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1125.1075, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5078, "query_norm": 1.3805, "queue_k_norm": 1.5054, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4618, "sent_len_1": 66.8135, "sent_len_max_0": 127.5113, "sent_len_max_1": 189.9712, "stdk": 0.0483, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 84800 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.7157, "doc_norm": 1.4916, "encoder_q-embeddings": 596.4965, "encoder_q-layer.0": 411.0303, "encoder_q-layer.1": 426.5299, "encoder_q-layer.10": 642.7432, "encoder_q-layer.11": 1607.3534, "encoder_q-layer.2": 493.9283, "encoder_q-layer.3": 517.7462, "encoder_q-layer.4": 516.1277, "encoder_q-layer.5": 523.26, "encoder_q-layer.6": 518.031, "encoder_q-layer.7": 539.962, "encoder_q-layer.8": 621.71, "encoder_q-layer.9": 569.045, "epoch": 0.55, "inbatch_neg_score": 0.5082, "inbatch_pos_score": 1.0547, "learning_rate": 8.38888888888889e-06, "loss": 3.7157, "norm_diff": 0.1208, "norm_loss": 0.0, "num_token_doc": 66.8537, "num_token_overlap": 11.7323, "num_token_query": 31.6226, "num_token_union": 65.275, "num_word_context": 202.5886, "num_word_doc": 49.8378, "num_word_query": 23.4838, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1066.657, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5088, "query_norm": 1.3709, "queue_k_norm": 1.5067, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.6226, "sent_len_1": 66.8537, "sent_len_max_0": 127.6338, "sent_len_max_1": 193.8338, "stdk": 0.0474, "stdq": 0.0419, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 84900 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 3.7211, "doc_norm": 1.5109, "encoder_q-embeddings": 869.5407, "encoder_q-layer.0": 638.2106, "encoder_q-layer.1": 727.5753, "encoder_q-layer.10": 633.4936, "encoder_q-layer.11": 1585.9805, "encoder_q-layer.2": 832.1356, "encoder_q-layer.3": 817.9604, "encoder_q-layer.4": 806.0886, "encoder_q-layer.5": 737.9385, "encoder_q-layer.6": 807.5244, "encoder_q-layer.7": 824.5336, "encoder_q-layer.8": 768.4784, "encoder_q-layer.9": 611.1151, "epoch": 0.55, "inbatch_neg_score": 0.5106, "inbatch_pos_score": 1.0596, "learning_rate": 8.333333333333334e-06, "loss": 3.7211, "norm_diff": 0.1381, "norm_loss": 0.0, "num_token_doc": 66.6619, "num_token_overlap": 11.67, "num_token_query": 31.3741, "num_token_union": 65.0647, "num_word_context": 202.339, "num_word_doc": 49.807, "num_word_query": 23.3099, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1308.0102, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5088, "query_norm": 1.3729, "queue_k_norm": 1.5076, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3741, "sent_len_1": 66.6619, "sent_len_max_0": 127.4325, "sent_len_max_1": 189.0275, "stdk": 0.0482, "stdq": 0.042, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 85000 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.7252, "doc_norm": 1.5045, "encoder_q-embeddings": 3261.6826, "encoder_q-layer.0": 2364.4456, "encoder_q-layer.1": 2638.8062, "encoder_q-layer.10": 623.2271, "encoder_q-layer.11": 1526.3677, "encoder_q-layer.2": 2875.9133, "encoder_q-layer.3": 3080.8545, "encoder_q-layer.4": 2966.5586, "encoder_q-layer.5": 2712.8135, "encoder_q-layer.6": 2415.3582, "encoder_q-layer.7": 2092.8057, "encoder_q-layer.8": 1480.1001, "encoder_q-layer.9": 796.1005, "epoch": 0.55, "inbatch_neg_score": 0.5086, "inbatch_pos_score": 1.0674, "learning_rate": 8.27777777777778e-06, "loss": 3.7252, "norm_diff": 0.1235, "norm_loss": 0.0, "num_token_doc": 66.8362, "num_token_overlap": 11.6573, "num_token_query": 31.4338, "num_token_union": 65.1962, "num_word_context": 202.7577, "num_word_doc": 49.9255, "num_word_query": 23.3591, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3707.7293, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5078, "query_norm": 1.381, "queue_k_norm": 1.5056, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4338, "sent_len_1": 66.8362, "sent_len_max_0": 127.515, "sent_len_max_1": 186.55, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 85100 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 3.717, "doc_norm": 1.5088, "encoder_q-embeddings": 1374.0366, "encoder_q-layer.0": 957.6708, "encoder_q-layer.1": 1048.921, "encoder_q-layer.10": 1213.4106, "encoder_q-layer.11": 3073.1208, "encoder_q-layer.2": 1224.4344, "encoder_q-layer.3": 1288.1868, "encoder_q-layer.4": 1382.613, "encoder_q-layer.5": 1369.4293, "encoder_q-layer.6": 1414.5553, "encoder_q-layer.7": 1425.5778, "encoder_q-layer.8": 1433.23, "encoder_q-layer.9": 1180.0889, "epoch": 0.55, "inbatch_neg_score": 0.5071, "inbatch_pos_score": 1.0566, "learning_rate": 8.222222222222223e-06, "loss": 3.717, "norm_diff": 0.1371, "norm_loss": 0.0, "num_token_doc": 66.6043, "num_token_overlap": 11.6623, "num_token_query": 31.3443, "num_token_union": 65.0111, "num_word_context": 201.8974, "num_word_doc": 49.689, "num_word_query": 23.2701, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2307.6823, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5083, "query_norm": 1.3716, "queue_k_norm": 1.5048, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3443, "sent_len_1": 66.6043, "sent_len_max_0": 127.63, "sent_len_max_1": 188.625, "stdk": 0.0482, "stdq": 0.042, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 85200 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.7271, "doc_norm": 1.5057, "encoder_q-embeddings": 1550.4138, "encoder_q-layer.0": 1085.7258, "encoder_q-layer.1": 1229.8739, "encoder_q-layer.10": 1272.2487, "encoder_q-layer.11": 3384.3838, "encoder_q-layer.2": 1419.5846, "encoder_q-layer.3": 1473.4984, "encoder_q-layer.4": 1594.5015, "encoder_q-layer.5": 1625.472, "encoder_q-layer.6": 1680.667, "encoder_q-layer.7": 1397.4193, "encoder_q-layer.8": 1400.6736, "encoder_q-layer.9": 1212.3861, "epoch": 0.56, "inbatch_neg_score": 0.51, "inbatch_pos_score": 1.0684, "learning_rate": 8.166666666666668e-06, "loss": 3.7271, "norm_diff": 0.1288, "norm_loss": 0.0, "num_token_doc": 66.857, "num_token_overlap": 11.6708, "num_token_query": 31.2645, "num_token_union": 65.1162, "num_word_context": 202.32, "num_word_doc": 49.9455, "num_word_query": 23.2141, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2565.8078, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5098, "query_norm": 1.3769, "queue_k_norm": 1.5059, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2645, "sent_len_1": 66.857, "sent_len_max_0": 127.4412, "sent_len_max_1": 188.2512, "stdk": 0.048, "stdq": 0.0422, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 85300 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7207, "doc_norm": 1.5164, "encoder_q-embeddings": 1135.4775, "encoder_q-layer.0": 776.5175, "encoder_q-layer.1": 821.8583, "encoder_q-layer.10": 1422.0569, "encoder_q-layer.11": 3231.877, "encoder_q-layer.2": 900.3508, "encoder_q-layer.3": 933.5707, "encoder_q-layer.4": 947.3932, "encoder_q-layer.5": 972.554, "encoder_q-layer.6": 1060.4318, "encoder_q-layer.7": 1235.1774, "encoder_q-layer.8": 1409.3453, "encoder_q-layer.9": 1227.2394, "epoch": 0.56, "inbatch_neg_score": 0.5078, "inbatch_pos_score": 1.0967, "learning_rate": 8.111111111111112e-06, "loss": 3.7207, "norm_diff": 0.124, "norm_loss": 0.0, "num_token_doc": 66.9319, "num_token_overlap": 11.7055, "num_token_query": 31.4225, "num_token_union": 65.1983, "num_word_context": 202.6294, "num_word_doc": 50.0037, "num_word_query": 23.3348, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2140.4825, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5078, "query_norm": 1.3924, "queue_k_norm": 1.5074, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4225, "sent_len_1": 66.9319, "sent_len_max_0": 127.2613, "sent_len_max_1": 187.8063, "stdk": 0.0484, "stdq": 0.0429, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 85400 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.7309, "doc_norm": 1.5092, "encoder_q-embeddings": 1155.8556, "encoder_q-layer.0": 781.3004, "encoder_q-layer.1": 846.2878, "encoder_q-layer.10": 1182.4968, "encoder_q-layer.11": 3179.27, "encoder_q-layer.2": 917.8346, "encoder_q-layer.3": 931.2779, "encoder_q-layer.4": 935.5089, "encoder_q-layer.5": 984.8792, "encoder_q-layer.6": 1006.232, "encoder_q-layer.7": 1124.4158, "encoder_q-layer.8": 1315.9436, "encoder_q-layer.9": 1132.4418, "epoch": 0.56, "inbatch_neg_score": 0.5057, "inbatch_pos_score": 1.0791, "learning_rate": 8.055555555555557e-06, "loss": 3.7309, "norm_diff": 0.1352, "norm_loss": 0.0, "num_token_doc": 66.8013, "num_token_overlap": 11.6216, "num_token_query": 31.2415, "num_token_union": 65.1172, "num_word_context": 202.376, "num_word_doc": 49.8398, "num_word_query": 23.2079, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2080.3434, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5068, "query_norm": 1.3739, "queue_k_norm": 1.507, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2415, "sent_len_1": 66.8013, "sent_len_max_0": 127.3937, "sent_len_max_1": 188.0062, "stdk": 0.0481, "stdq": 0.0422, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 85500 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.713, "doc_norm": 1.5091, "encoder_q-embeddings": 1150.9827, "encoder_q-layer.0": 765.2045, "encoder_q-layer.1": 804.2938, "encoder_q-layer.10": 1230.752, "encoder_q-layer.11": 3170.5115, "encoder_q-layer.2": 880.4922, "encoder_q-layer.3": 898.3081, "encoder_q-layer.4": 890.7004, "encoder_q-layer.5": 962.9736, "encoder_q-layer.6": 1043.6766, "encoder_q-layer.7": 1119.5055, "encoder_q-layer.8": 1297.9728, "encoder_q-layer.9": 1180.3527, "epoch": 0.56, "inbatch_neg_score": 0.5094, "inbatch_pos_score": 1.0908, "learning_rate": 8.000000000000001e-06, "loss": 3.713, "norm_diff": 0.1276, "norm_loss": 0.0, "num_token_doc": 66.8793, "num_token_overlap": 11.6753, "num_token_query": 31.4189, "num_token_union": 65.2475, "num_word_context": 202.4278, "num_word_doc": 49.8969, "num_word_query": 23.3315, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2082.5014, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5093, "query_norm": 1.3816, "queue_k_norm": 1.5071, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4189, "sent_len_1": 66.8793, "sent_len_max_0": 127.4188, "sent_len_max_1": 189.785, "stdk": 0.0481, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 85600 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.7103, "doc_norm": 1.511, "encoder_q-embeddings": 1827.8219, "encoder_q-layer.0": 1215.1487, "encoder_q-layer.1": 1337.749, "encoder_q-layer.10": 1255.0276, "encoder_q-layer.11": 3195.4763, "encoder_q-layer.2": 1594.5616, "encoder_q-layer.3": 1592.2823, "encoder_q-layer.4": 1582.2623, "encoder_q-layer.5": 1751.12, "encoder_q-layer.6": 1560.5684, "encoder_q-layer.7": 1356.9924, "encoder_q-layer.8": 1341.1594, "encoder_q-layer.9": 1182.3805, "epoch": 0.56, "inbatch_neg_score": 0.5094, "inbatch_pos_score": 1.0664, "learning_rate": 7.944444444444445e-06, "loss": 3.7103, "norm_diff": 0.1388, "norm_loss": 0.0, "num_token_doc": 66.6513, "num_token_overlap": 11.6767, "num_token_query": 31.3161, "num_token_union": 65.0717, "num_word_context": 202.3374, "num_word_doc": 49.7702, "num_word_query": 23.2836, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2597.996, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5093, "query_norm": 1.3722, "queue_k_norm": 1.5078, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3161, "sent_len_1": 66.6513, "sent_len_max_0": 127.3275, "sent_len_max_1": 189.0475, "stdk": 0.0482, "stdq": 0.042, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 85700 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.7212, "doc_norm": 1.5138, "encoder_q-embeddings": 1135.5419, "encoder_q-layer.0": 782.2671, "encoder_q-layer.1": 894.5037, "encoder_q-layer.10": 1285.9574, "encoder_q-layer.11": 3103.7939, "encoder_q-layer.2": 1027.8265, "encoder_q-layer.3": 1078.0791, "encoder_q-layer.4": 1050.0491, "encoder_q-layer.5": 1142.7042, "encoder_q-layer.6": 1230.6493, "encoder_q-layer.7": 1237.137, "encoder_q-layer.8": 1326.02, "encoder_q-layer.9": 1179.0168, "epoch": 0.56, "inbatch_neg_score": 0.5103, "inbatch_pos_score": 1.082, "learning_rate": 7.88888888888889e-06, "loss": 3.7212, "norm_diff": 0.1369, "norm_loss": 0.0, "num_token_doc": 66.7698, "num_token_overlap": 11.6877, "num_token_query": 31.3542, "num_token_union": 65.0839, "num_word_context": 202.0398, "num_word_doc": 49.8305, "num_word_query": 23.3037, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2122.8967, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5103, "query_norm": 1.3768, "queue_k_norm": 1.5079, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3542, "sent_len_1": 66.7698, "sent_len_max_0": 127.545, "sent_len_max_1": 189.0012, "stdk": 0.0483, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 85800 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.7184, "doc_norm": 1.5088, "encoder_q-embeddings": 1176.9626, "encoder_q-layer.0": 772.3656, "encoder_q-layer.1": 840.7836, "encoder_q-layer.10": 1274.5043, "encoder_q-layer.11": 3171.9436, "encoder_q-layer.2": 995.1019, "encoder_q-layer.3": 1129.6902, "encoder_q-layer.4": 1186.468, "encoder_q-layer.5": 1221.1122, "encoder_q-layer.6": 1352.2021, "encoder_q-layer.7": 1266.0475, "encoder_q-layer.8": 1361.0383, "encoder_q-layer.9": 1203.3438, "epoch": 0.56, "inbatch_neg_score": 0.5111, "inbatch_pos_score": 1.0791, "learning_rate": 7.833333333333333e-06, "loss": 3.7184, "norm_diff": 0.1273, "norm_loss": 0.0, "num_token_doc": 66.8003, "num_token_overlap": 11.7123, "num_token_query": 31.3784, "num_token_union": 65.0945, "num_word_context": 202.216, "num_word_doc": 49.8393, "num_word_query": 23.3106, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2192.0791, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5098, "query_norm": 1.3815, "queue_k_norm": 1.5087, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3784, "sent_len_1": 66.8003, "sent_len_max_0": 127.5962, "sent_len_max_1": 189.9725, "stdk": 0.0481, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 85900 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.723, "doc_norm": 1.5052, "encoder_q-embeddings": 1512.7728, "encoder_q-layer.0": 1023.6602, "encoder_q-layer.1": 1161.0994, "encoder_q-layer.10": 1271.4253, "encoder_q-layer.11": 3206.8518, "encoder_q-layer.2": 1327.4703, "encoder_q-layer.3": 1281.9415, "encoder_q-layer.4": 1389.4812, "encoder_q-layer.5": 1369.1228, "encoder_q-layer.6": 1405.3088, "encoder_q-layer.7": 1399.5714, "encoder_q-layer.8": 1465.0962, "encoder_q-layer.9": 1268.1628, "epoch": 0.56, "inbatch_neg_score": 0.5086, "inbatch_pos_score": 1.0742, "learning_rate": 7.777777777777777e-06, "loss": 3.723, "norm_diff": 0.1251, "norm_loss": 0.0, "num_token_doc": 66.7345, "num_token_overlap": 11.6477, "num_token_query": 31.3158, "num_token_union": 65.0987, "num_word_context": 202.5471, "num_word_doc": 49.846, "num_word_query": 23.2508, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2392.7039, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5093, "query_norm": 1.3801, "queue_k_norm": 1.5079, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3158, "sent_len_1": 66.7345, "sent_len_max_0": 127.52, "sent_len_max_1": 189.1275, "stdk": 0.048, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 86000 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7322, "doc_norm": 1.5089, "encoder_q-embeddings": 1137.4755, "encoder_q-layer.0": 752.6396, "encoder_q-layer.1": 775.9616, "encoder_q-layer.10": 1278.478, "encoder_q-layer.11": 3282.5461, "encoder_q-layer.2": 883.5048, "encoder_q-layer.3": 891.4236, "encoder_q-layer.4": 937.1513, "encoder_q-layer.5": 915.4402, "encoder_q-layer.6": 1006.6111, "encoder_q-layer.7": 1152.5535, "encoder_q-layer.8": 1297.1494, "encoder_q-layer.9": 1203.8091, "epoch": 0.56, "inbatch_neg_score": 0.5097, "inbatch_pos_score": 1.0859, "learning_rate": 7.722222222222223e-06, "loss": 3.7322, "norm_diff": 0.1307, "norm_loss": 0.0, "num_token_doc": 66.5845, "num_token_overlap": 11.6628, "num_token_query": 31.4348, "num_token_union": 65.0478, "num_word_context": 202.279, "num_word_doc": 49.6899, "num_word_query": 23.3516, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2130.9622, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5103, "query_norm": 1.3782, "queue_k_norm": 1.5079, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4348, "sent_len_1": 66.5845, "sent_len_max_0": 127.5487, "sent_len_max_1": 188.795, "stdk": 0.0481, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 86100 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.7127, "doc_norm": 1.5157, "encoder_q-embeddings": 1828.5776, "encoder_q-layer.0": 1202.7996, "encoder_q-layer.1": 1306.5825, "encoder_q-layer.10": 1305.4192, "encoder_q-layer.11": 3395.7544, "encoder_q-layer.2": 1529.6051, "encoder_q-layer.3": 1583.4797, "encoder_q-layer.4": 1674.1935, "encoder_q-layer.5": 1557.6827, "encoder_q-layer.6": 1460.177, "encoder_q-layer.7": 1435.7618, "encoder_q-layer.8": 1491.6591, "encoder_q-layer.9": 1256.7975, "epoch": 0.56, "inbatch_neg_score": 0.5072, "inbatch_pos_score": 1.082, "learning_rate": 7.666666666666667e-06, "loss": 3.7127, "norm_diff": 0.1257, "norm_loss": 0.0, "num_token_doc": 66.9842, "num_token_overlap": 11.7048, "num_token_query": 31.4463, "num_token_union": 65.3094, "num_word_context": 202.6037, "num_word_doc": 49.9775, "num_word_query": 23.3625, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2657.0227, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5083, "query_norm": 1.39, "queue_k_norm": 1.5109, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4463, "sent_len_1": 66.9842, "sent_len_max_0": 127.4475, "sent_len_max_1": 190.6925, "stdk": 0.0484, "stdq": 0.0428, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 86200 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.72, "doc_norm": 1.5117, "encoder_q-embeddings": 1563.5924, "encoder_q-layer.0": 1072.4091, "encoder_q-layer.1": 1156.0527, "encoder_q-layer.10": 1335.9072, "encoder_q-layer.11": 3354.6873, "encoder_q-layer.2": 1363.2449, "encoder_q-layer.3": 1327.8438, "encoder_q-layer.4": 1369.4832, "encoder_q-layer.5": 1396.5342, "encoder_q-layer.6": 1499.282, "encoder_q-layer.7": 1487.175, "encoder_q-layer.8": 1595.6221, "encoder_q-layer.9": 1261.382, "epoch": 0.56, "inbatch_neg_score": 0.51, "inbatch_pos_score": 1.0752, "learning_rate": 7.611111111111112e-06, "loss": 3.72, "norm_diff": 0.1371, "norm_loss": 0.0, "num_token_doc": 66.6425, "num_token_overlap": 11.6448, "num_token_query": 31.3285, "num_token_union": 65.0196, "num_word_context": 201.7313, "num_word_doc": 49.7084, "num_word_query": 23.2549, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2464.7504, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5103, "query_norm": 1.3747, "queue_k_norm": 1.5072, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3285, "sent_len_1": 66.6425, "sent_len_max_0": 127.4825, "sent_len_max_1": 188.74, "stdk": 0.0482, "stdq": 0.0421, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 86300 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.6998, "doc_norm": 1.505, "encoder_q-embeddings": 2465.5876, "encoder_q-layer.0": 1739.0868, "encoder_q-layer.1": 2136.8582, "encoder_q-layer.10": 1517.5354, "encoder_q-layer.11": 3483.4338, "encoder_q-layer.2": 2930.8372, "encoder_q-layer.3": 3055.2981, "encoder_q-layer.4": 2569.3809, "encoder_q-layer.5": 1925.3156, "encoder_q-layer.6": 1948.1486, "encoder_q-layer.7": 1636.3307, "encoder_q-layer.8": 1611.9031, "encoder_q-layer.9": 1294.5464, "epoch": 0.56, "inbatch_neg_score": 0.5144, "inbatch_pos_score": 1.0605, "learning_rate": 7.555555555555556e-06, "loss": 3.6998, "norm_diff": 0.1201, "norm_loss": 0.0, "num_token_doc": 66.8531, "num_token_overlap": 11.7088, "num_token_query": 31.362, "num_token_union": 65.0771, "num_word_context": 202.2403, "num_word_doc": 49.8749, "num_word_query": 23.2871, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3499.0086, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5132, "query_norm": 1.3849, "queue_k_norm": 1.5087, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.362, "sent_len_1": 66.8531, "sent_len_max_0": 127.4737, "sent_len_max_1": 188.9062, "stdk": 0.0479, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 86400 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.7033, "doc_norm": 1.5177, "encoder_q-embeddings": 1119.7139, "encoder_q-layer.0": 767.6314, "encoder_q-layer.1": 789.2274, "encoder_q-layer.10": 1296.1226, "encoder_q-layer.11": 3213.6323, "encoder_q-layer.2": 876.5916, "encoder_q-layer.3": 910.3643, "encoder_q-layer.4": 962.5778, "encoder_q-layer.5": 1004.3456, "encoder_q-layer.6": 1051.3901, "encoder_q-layer.7": 1221.8516, "encoder_q-layer.8": 1350.4078, "encoder_q-layer.9": 1228.7771, "epoch": 0.56, "inbatch_neg_score": 0.5126, "inbatch_pos_score": 1.0859, "learning_rate": 7.5e-06, "loss": 3.7033, "norm_diff": 0.1322, "norm_loss": 0.0, "num_token_doc": 66.8669, "num_token_overlap": 11.7378, "num_token_query": 31.5044, "num_token_union": 65.1937, "num_word_context": 202.4073, "num_word_doc": 49.8905, "num_word_query": 23.4108, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2114.946, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5137, "query_norm": 1.3855, "queue_k_norm": 1.509, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5044, "sent_len_1": 66.8669, "sent_len_max_0": 127.3838, "sent_len_max_1": 191.5475, "stdk": 0.0484, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 86500 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.713, "doc_norm": 1.5106, "encoder_q-embeddings": 1213.2782, "encoder_q-layer.0": 859.7612, "encoder_q-layer.1": 851.519, "encoder_q-layer.10": 1202.1569, "encoder_q-layer.11": 3071.0122, "encoder_q-layer.2": 975.9269, "encoder_q-layer.3": 954.9713, "encoder_q-layer.4": 962.3495, "encoder_q-layer.5": 1021.6281, "encoder_q-layer.6": 1177.8981, "encoder_q-layer.7": 1298.5525, "encoder_q-layer.8": 1423.6467, "encoder_q-layer.9": 1189.6329, "epoch": 0.56, "inbatch_neg_score": 0.5129, "inbatch_pos_score": 1.0996, "learning_rate": 7.444444444444444e-06, "loss": 3.713, "norm_diff": 0.1244, "norm_loss": 0.0, "num_token_doc": 66.7985, "num_token_overlap": 11.6584, "num_token_query": 31.281, "num_token_union": 65.107, "num_word_context": 202.1225, "num_word_doc": 49.8599, "num_word_query": 23.2289, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2127.9991, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5132, "query_norm": 1.3862, "queue_k_norm": 1.5085, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.281, "sent_len_1": 66.7985, "sent_len_max_0": 127.6637, "sent_len_max_1": 188.0975, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 86600 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.7278, "doc_norm": 1.5101, "encoder_q-embeddings": 1267.4021, "encoder_q-layer.0": 904.7761, "encoder_q-layer.1": 946.3679, "encoder_q-layer.10": 1291.3177, "encoder_q-layer.11": 3303.6565, "encoder_q-layer.2": 1095.3866, "encoder_q-layer.3": 1117.4366, "encoder_q-layer.4": 1116.146, "encoder_q-layer.5": 1125.1461, "encoder_q-layer.6": 1046.291, "encoder_q-layer.7": 1132.423, "encoder_q-layer.8": 1318.9471, "encoder_q-layer.9": 1209.0212, "epoch": 0.56, "inbatch_neg_score": 0.5162, "inbatch_pos_score": 1.0664, "learning_rate": 7.38888888888889e-06, "loss": 3.7278, "norm_diff": 0.1323, "norm_loss": 0.0, "num_token_doc": 66.5652, "num_token_overlap": 11.5879, "num_token_query": 31.2332, "num_token_union": 64.9433, "num_word_context": 202.2104, "num_word_doc": 49.6759, "num_word_query": 23.2041, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2239.7512, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5156, "query_norm": 1.3778, "queue_k_norm": 1.5086, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2332, "sent_len_1": 66.5652, "sent_len_max_0": 127.53, "sent_len_max_1": 188.9212, "stdk": 0.0481, "stdq": 0.0421, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 86700 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.7256, "doc_norm": 1.515, "encoder_q-embeddings": 617.5566, "encoder_q-layer.0": 411.7298, "encoder_q-layer.1": 453.6386, "encoder_q-layer.10": 604.5527, "encoder_q-layer.11": 1546.0669, "encoder_q-layer.2": 525.7224, "encoder_q-layer.3": 514.3262, "encoder_q-layer.4": 544.159, "encoder_q-layer.5": 529.0566, "encoder_q-layer.6": 544.3885, "encoder_q-layer.7": 556.5248, "encoder_q-layer.8": 628.5391, "encoder_q-layer.9": 575.6856, "epoch": 0.56, "inbatch_neg_score": 0.5178, "inbatch_pos_score": 1.0898, "learning_rate": 7.333333333333334e-06, "loss": 3.7256, "norm_diff": 0.1287, "norm_loss": 0.0, "num_token_doc": 66.8058, "num_token_overlap": 11.6148, "num_token_query": 31.3331, "num_token_union": 65.1541, "num_word_context": 202.3407, "num_word_doc": 49.8196, "num_word_query": 23.2614, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1067.7957, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5176, "query_norm": 1.3864, "queue_k_norm": 1.5095, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3331, "sent_len_1": 66.8058, "sent_len_max_0": 127.395, "sent_len_max_1": 189.5687, "stdk": 0.0483, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 86800 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.7077, "doc_norm": 1.5113, "encoder_q-embeddings": 587.6013, "encoder_q-layer.0": 393.381, "encoder_q-layer.1": 399.496, "encoder_q-layer.10": 672.5462, "encoder_q-layer.11": 1756.9336, "encoder_q-layer.2": 451.801, "encoder_q-layer.3": 458.6282, "encoder_q-layer.4": 459.1605, "encoder_q-layer.5": 466.5793, "encoder_q-layer.6": 527.1057, "encoder_q-layer.7": 576.2417, "encoder_q-layer.8": 676.5969, "encoder_q-layer.9": 641.0182, "epoch": 0.57, "inbatch_neg_score": 0.5163, "inbatch_pos_score": 1.0957, "learning_rate": 7.277777777777778e-06, "loss": 3.7077, "norm_diff": 0.1201, "norm_loss": 0.0, "num_token_doc": 66.6637, "num_token_overlap": 11.6644, "num_token_query": 31.4504, "num_token_union": 65.1394, "num_word_context": 202.0944, "num_word_doc": 49.737, "num_word_query": 23.3594, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1138.6442, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5171, "query_norm": 1.3912, "queue_k_norm": 1.5091, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4504, "sent_len_1": 66.6637, "sent_len_max_0": 127.28, "sent_len_max_1": 188.1887, "stdk": 0.0482, "stdq": 0.0427, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 86900 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 3.7405, "doc_norm": 1.5083, "encoder_q-embeddings": 489.7719, "encoder_q-layer.0": 325.2927, "encoder_q-layer.1": 348.1613, "encoder_q-layer.10": 603.4933, "encoder_q-layer.11": 1588.7015, "encoder_q-layer.2": 405.6191, "encoder_q-layer.3": 412.6115, "encoder_q-layer.4": 454.2379, "encoder_q-layer.5": 439.2519, "encoder_q-layer.6": 484.7456, "encoder_q-layer.7": 527.7079, "encoder_q-layer.8": 606.1637, "encoder_q-layer.9": 580.9486, "epoch": 0.57, "inbatch_neg_score": 0.5183, "inbatch_pos_score": 1.0742, "learning_rate": 7.222222222222222e-06, "loss": 3.7405, "norm_diff": 0.1338, "norm_loss": 0.0, "num_token_doc": 66.7952, "num_token_overlap": 11.5855, "num_token_query": 31.2966, "num_token_union": 65.1334, "num_word_context": 202.5369, "num_word_doc": 49.8039, "num_word_query": 23.2114, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1010.1813, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5176, "query_norm": 1.3745, "queue_k_norm": 1.5104, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2966, "sent_len_1": 66.7952, "sent_len_max_0": 127.5675, "sent_len_max_1": 191.4238, "stdk": 0.048, "stdq": 0.042, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 87000 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.7273, "doc_norm": 1.5097, "encoder_q-embeddings": 549.7214, "encoder_q-layer.0": 380.7178, "encoder_q-layer.1": 390.8864, "encoder_q-layer.10": 643.1498, "encoder_q-layer.11": 1563.7197, "encoder_q-layer.2": 436.1132, "encoder_q-layer.3": 442.891, "encoder_q-layer.4": 463.0401, "encoder_q-layer.5": 463.8558, "encoder_q-layer.6": 493.6563, "encoder_q-layer.7": 545.6015, "encoder_q-layer.8": 611.2673, "encoder_q-layer.9": 593.9852, "epoch": 0.57, "inbatch_neg_score": 0.5195, "inbatch_pos_score": 1.0957, "learning_rate": 7.166666666666667e-06, "loss": 3.7273, "norm_diff": 0.121, "norm_loss": 0.0, "num_token_doc": 66.7771, "num_token_overlap": 11.6777, "num_token_query": 31.3732, "num_token_union": 65.1525, "num_word_context": 202.2692, "num_word_doc": 49.8032, "num_word_query": 23.3, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1013.7234, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.519, "query_norm": 1.3887, "queue_k_norm": 1.5088, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3732, "sent_len_1": 66.7771, "sent_len_max_0": 127.3088, "sent_len_max_1": 190.3775, "stdk": 0.0481, "stdq": 0.0426, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 87100 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.7354, "doc_norm": 1.5074, "encoder_q-embeddings": 692.9579, "encoder_q-layer.0": 461.2401, "encoder_q-layer.1": 497.5034, "encoder_q-layer.10": 678.3924, "encoder_q-layer.11": 1683.1123, "encoder_q-layer.2": 565.5682, "encoder_q-layer.3": 583.2312, "encoder_q-layer.4": 623.9515, "encoder_q-layer.5": 660.3142, "encoder_q-layer.6": 650.9498, "encoder_q-layer.7": 635.3273, "encoder_q-layer.8": 687.0211, "encoder_q-layer.9": 592.636, "epoch": 0.57, "inbatch_neg_score": 0.5194, "inbatch_pos_score": 1.0615, "learning_rate": 7.111111111111112e-06, "loss": 3.7354, "norm_diff": 0.1305, "norm_loss": 0.0, "num_token_doc": 66.9523, "num_token_overlap": 11.6851, "num_token_query": 31.3978, "num_token_union": 65.252, "num_word_context": 202.3454, "num_word_doc": 49.9952, "num_word_query": 23.3251, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1168.0317, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.52, "query_norm": 1.3769, "queue_k_norm": 1.5084, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3978, "sent_len_1": 66.9523, "sent_len_max_0": 127.47, "sent_len_max_1": 188.6525, "stdk": 0.048, "stdq": 0.0421, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 87200 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.7202, "doc_norm": 1.5146, "encoder_q-embeddings": 544.7031, "encoder_q-layer.0": 358.8245, "encoder_q-layer.1": 372.7153, "encoder_q-layer.10": 660.2136, "encoder_q-layer.11": 1631.1755, "encoder_q-layer.2": 410.6082, "encoder_q-layer.3": 441.8202, "encoder_q-layer.4": 455.5579, "encoder_q-layer.5": 450.9041, "encoder_q-layer.6": 512.7641, "encoder_q-layer.7": 554.6066, "encoder_q-layer.8": 629.9001, "encoder_q-layer.9": 596.4326, "epoch": 0.57, "inbatch_neg_score": 0.5198, "inbatch_pos_score": 1.0879, "learning_rate": 7.055555555555556e-06, "loss": 3.7202, "norm_diff": 0.1303, "norm_loss": 0.0, "num_token_doc": 66.908, "num_token_overlap": 11.6167, "num_token_query": 31.1551, "num_token_union": 65.1227, "num_word_context": 202.333, "num_word_doc": 49.8952, "num_word_query": 23.1379, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1051.7824, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5205, "query_norm": 1.3843, "queue_k_norm": 1.5123, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.1551, "sent_len_1": 66.908, "sent_len_max_0": 127.405, "sent_len_max_1": 189.015, "stdk": 0.0482, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 87300 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.7386, "doc_norm": 1.5127, "encoder_q-embeddings": 725.0955, "encoder_q-layer.0": 480.0825, "encoder_q-layer.1": 535.8403, "encoder_q-layer.10": 672.3478, "encoder_q-layer.11": 1732.4694, "encoder_q-layer.2": 650.0162, "encoder_q-layer.3": 688.9474, "encoder_q-layer.4": 710.1315, "encoder_q-layer.5": 712.4971, "encoder_q-layer.6": 721.9781, "encoder_q-layer.7": 748.7778, "encoder_q-layer.8": 804.1832, "encoder_q-layer.9": 665.7433, "epoch": 0.57, "inbatch_neg_score": 0.5232, "inbatch_pos_score": 1.0674, "learning_rate": 7.000000000000001e-06, "loss": 3.7386, "norm_diff": 0.1297, "norm_loss": 0.0, "num_token_doc": 66.5453, "num_token_overlap": 11.6575, "num_token_query": 31.3487, "num_token_union": 64.9929, "num_word_context": 202.1262, "num_word_doc": 49.6865, "num_word_query": 23.2899, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1256.3204, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.522, "query_norm": 1.383, "queue_k_norm": 1.5103, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3487, "sent_len_1": 66.5453, "sent_len_max_0": 127.3462, "sent_len_max_1": 186.985, "stdk": 0.0481, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 87400 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.7169, "doc_norm": 1.5123, "encoder_q-embeddings": 586.3774, "encoder_q-layer.0": 404.1955, "encoder_q-layer.1": 450.1067, "encoder_q-layer.10": 607.4354, "encoder_q-layer.11": 1564.03, "encoder_q-layer.2": 509.679, "encoder_q-layer.3": 544.3957, "encoder_q-layer.4": 550.9033, "encoder_q-layer.5": 571.2442, "encoder_q-layer.6": 581.529, "encoder_q-layer.7": 581.6078, "encoder_q-layer.8": 602.6711, "encoder_q-layer.9": 548.0718, "epoch": 0.57, "inbatch_neg_score": 0.5192, "inbatch_pos_score": 1.1055, "learning_rate": 6.944444444444445e-06, "loss": 3.7169, "norm_diff": 0.1327, "norm_loss": 0.0, "num_token_doc": 66.7878, "num_token_overlap": 11.6854, "num_token_query": 31.4939, "num_token_union": 65.1954, "num_word_context": 202.6933, "num_word_doc": 49.8729, "num_word_query": 23.3998, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1060.2546, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5195, "query_norm": 1.3795, "queue_k_norm": 1.5103, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4939, "sent_len_1": 66.7878, "sent_len_max_0": 127.3, "sent_len_max_1": 190.21, "stdk": 0.0481, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 87500 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.7246, "doc_norm": 1.5174, "encoder_q-embeddings": 579.8681, "encoder_q-layer.0": 392.3524, "encoder_q-layer.1": 437.5745, "encoder_q-layer.10": 669.657, "encoder_q-layer.11": 1620.8298, "encoder_q-layer.2": 492.0748, "encoder_q-layer.3": 509.0746, "encoder_q-layer.4": 536.379, "encoder_q-layer.5": 580.9606, "encoder_q-layer.6": 572.6614, "encoder_q-layer.7": 605.3893, "encoder_q-layer.8": 644.7693, "encoder_q-layer.9": 579.274, "epoch": 0.57, "inbatch_neg_score": 0.522, "inbatch_pos_score": 1.0908, "learning_rate": 6.888888888888889e-06, "loss": 3.7246, "norm_diff": 0.1366, "norm_loss": 0.0, "num_token_doc": 66.7049, "num_token_overlap": 11.6744, "num_token_query": 31.3311, "num_token_union": 65.0754, "num_word_context": 202.1503, "num_word_doc": 49.7588, "num_word_query": 23.25, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1081.024, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5225, "query_norm": 1.3807, "queue_k_norm": 1.511, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3311, "sent_len_1": 66.7049, "sent_len_max_0": 127.4788, "sent_len_max_1": 188.3837, "stdk": 0.0483, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 87600 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.7192, "doc_norm": 1.5146, "encoder_q-embeddings": 554.8509, "encoder_q-layer.0": 374.9146, "encoder_q-layer.1": 397.2758, "encoder_q-layer.10": 664.5069, "encoder_q-layer.11": 1676.3558, "encoder_q-layer.2": 443.8438, "encoder_q-layer.3": 465.8781, "encoder_q-layer.4": 478.8651, "encoder_q-layer.5": 498.4275, "encoder_q-layer.6": 506.6702, "encoder_q-layer.7": 601.3618, "encoder_q-layer.8": 688.7974, "encoder_q-layer.9": 624.7945, "epoch": 0.57, "inbatch_neg_score": 0.5225, "inbatch_pos_score": 1.0957, "learning_rate": 6.833333333333333e-06, "loss": 3.7192, "norm_diff": 0.1248, "norm_loss": 0.0, "num_token_doc": 66.8275, "num_token_overlap": 11.6826, "num_token_query": 31.4173, "num_token_union": 65.1248, "num_word_context": 202.2026, "num_word_doc": 49.8181, "num_word_query": 23.3182, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1049.2789, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5225, "query_norm": 1.3898, "queue_k_norm": 1.5121, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4173, "sent_len_1": 66.8275, "sent_len_max_0": 127.535, "sent_len_max_1": 191.3812, "stdk": 0.0482, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 87700 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.7008, "doc_norm": 1.5083, "encoder_q-embeddings": 665.4104, "encoder_q-layer.0": 433.4972, "encoder_q-layer.1": 466.4271, "encoder_q-layer.10": 691.8027, "encoder_q-layer.11": 1585.2725, "encoder_q-layer.2": 541.3001, "encoder_q-layer.3": 570.1113, "encoder_q-layer.4": 640.8432, "encoder_q-layer.5": 694.9628, "encoder_q-layer.6": 698.8689, "encoder_q-layer.7": 684.557, "encoder_q-layer.8": 682.6864, "encoder_q-layer.9": 601.6251, "epoch": 0.57, "inbatch_neg_score": 0.5219, "inbatch_pos_score": 1.0879, "learning_rate": 6.777777777777779e-06, "loss": 3.7008, "norm_diff": 0.1266, "norm_loss": 0.0, "num_token_doc": 66.7603, "num_token_overlap": 11.7437, "num_token_query": 31.4864, "num_token_union": 65.1487, "num_word_context": 202.2419, "num_word_doc": 49.8363, "num_word_query": 23.3995, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1143.3376, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5225, "query_norm": 1.3817, "queue_k_norm": 1.5122, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4864, "sent_len_1": 66.7603, "sent_len_max_0": 127.325, "sent_len_max_1": 189.8775, "stdk": 0.048, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 87800 }, { "accuracy": 49.1211, "active_queue_size": 16384.0, "cl_loss": 3.7063, "doc_norm": 1.5128, "encoder_q-embeddings": 645.3307, "encoder_q-layer.0": 436.8174, "encoder_q-layer.1": 495.0234, "encoder_q-layer.10": 606.8719, "encoder_q-layer.11": 1658.9917, "encoder_q-layer.2": 520.7355, "encoder_q-layer.3": 529.0944, "encoder_q-layer.4": 543.1204, "encoder_q-layer.5": 531.3721, "encoder_q-layer.6": 563.6371, "encoder_q-layer.7": 585.6762, "encoder_q-layer.8": 641.5717, "encoder_q-layer.9": 614.0266, "epoch": 0.57, "inbatch_neg_score": 0.5215, "inbatch_pos_score": 1.0938, "learning_rate": 6.722222222222223e-06, "loss": 3.7063, "norm_diff": 0.131, "norm_loss": 0.0, "num_token_doc": 66.8452, "num_token_overlap": 11.7033, "num_token_query": 31.4859, "num_token_union": 65.1797, "num_word_context": 202.5756, "num_word_doc": 49.874, "num_word_query": 23.4053, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1117.276, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5229, "query_norm": 1.3818, "queue_k_norm": 1.5101, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4859, "sent_len_1": 66.8452, "sent_len_max_0": 127.4525, "sent_len_max_1": 190.4437, "stdk": 0.0481, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 87900 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.7269, "doc_norm": 1.509, "encoder_q-embeddings": 1821.0234, "encoder_q-layer.0": 1373.9766, "encoder_q-layer.1": 1420.9368, "encoder_q-layer.10": 607.5631, "encoder_q-layer.11": 1510.6576, "encoder_q-layer.2": 1655.6501, "encoder_q-layer.3": 1818.9017, "encoder_q-layer.4": 1867.6844, "encoder_q-layer.5": 1793.1562, "encoder_q-layer.6": 1702.0052, "encoder_q-layer.7": 1514.7646, "encoder_q-layer.8": 1115.8938, "encoder_q-layer.9": 618.2451, "epoch": 0.57, "inbatch_neg_score": 0.5225, "inbatch_pos_score": 1.1016, "learning_rate": 6.666666666666667e-06, "loss": 3.7269, "norm_diff": 0.1194, "norm_loss": 0.0, "num_token_doc": 66.8477, "num_token_overlap": 11.6522, "num_token_query": 31.3539, "num_token_union": 65.207, "num_word_context": 202.5792, "num_word_doc": 49.9377, "num_word_query": 23.2822, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2295.8374, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5234, "query_norm": 1.3896, "queue_k_norm": 1.5117, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3539, "sent_len_1": 66.8477, "sent_len_max_0": 127.3975, "sent_len_max_1": 187.34, "stdk": 0.048, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 88000 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.7205, "doc_norm": 1.5133, "encoder_q-embeddings": 555.471, "encoder_q-layer.0": 369.4922, "encoder_q-layer.1": 378.169, "encoder_q-layer.10": 699.3826, "encoder_q-layer.11": 1709.9021, "encoder_q-layer.2": 436.52, "encoder_q-layer.3": 457.3055, "encoder_q-layer.4": 487.2235, "encoder_q-layer.5": 529.5083, "encoder_q-layer.6": 562.9378, "encoder_q-layer.7": 599.0626, "encoder_q-layer.8": 676.0269, "encoder_q-layer.9": 640.3942, "epoch": 0.57, "inbatch_neg_score": 0.5213, "inbatch_pos_score": 1.0664, "learning_rate": 6.611111111111111e-06, "loss": 3.7205, "norm_diff": 0.1358, "norm_loss": 0.0, "num_token_doc": 66.5768, "num_token_overlap": 11.6257, "num_token_query": 31.3582, "num_token_union": 65.0445, "num_word_context": 202.2681, "num_word_doc": 49.6698, "num_word_query": 23.2995, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1093.218, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5225, "query_norm": 1.3776, "queue_k_norm": 1.5104, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3582, "sent_len_1": 66.5768, "sent_len_max_0": 127.485, "sent_len_max_1": 190.9925, "stdk": 0.0481, "stdq": 0.0422, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 88100 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.7175, "doc_norm": 1.516, "encoder_q-embeddings": 882.1557, "encoder_q-layer.0": 601.3246, "encoder_q-layer.1": 672.0997, "encoder_q-layer.10": 703.3846, "encoder_q-layer.11": 1671.8975, "encoder_q-layer.2": 805.3129, "encoder_q-layer.3": 820.6592, "encoder_q-layer.4": 846.5135, "encoder_q-layer.5": 847.1754, "encoder_q-layer.6": 806.2773, "encoder_q-layer.7": 755.7726, "encoder_q-layer.8": 861.074, "encoder_q-layer.9": 633.7898, "epoch": 0.57, "inbatch_neg_score": 0.522, "inbatch_pos_score": 1.1084, "learning_rate": 6.555555555555556e-06, "loss": 3.7175, "norm_diff": 0.1324, "norm_loss": 0.0, "num_token_doc": 66.7666, "num_token_overlap": 11.648, "num_token_query": 31.315, "num_token_union": 65.1131, "num_word_context": 202.1431, "num_word_doc": 49.8898, "num_word_query": 23.2705, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1326.9083, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5229, "query_norm": 1.3836, "queue_k_norm": 1.5122, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.315, "sent_len_1": 66.7666, "sent_len_max_0": 127.3988, "sent_len_max_1": 186.7725, "stdk": 0.0482, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 88200 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.7158, "doc_norm": 1.526, "encoder_q-embeddings": 556.645, "encoder_q-layer.0": 389.8877, "encoder_q-layer.1": 415.7118, "encoder_q-layer.10": 646.2419, "encoder_q-layer.11": 1684.5571, "encoder_q-layer.2": 478.3349, "encoder_q-layer.3": 499.4212, "encoder_q-layer.4": 512.4404, "encoder_q-layer.5": 506.9831, "encoder_q-layer.6": 532.893, "encoder_q-layer.7": 593.5167, "encoder_q-layer.8": 688.104, "encoder_q-layer.9": 639.8083, "epoch": 0.57, "inbatch_neg_score": 0.5231, "inbatch_pos_score": 1.0957, "learning_rate": 6.5000000000000004e-06, "loss": 3.7158, "norm_diff": 0.1434, "norm_loss": 0.0, "num_token_doc": 66.8655, "num_token_overlap": 11.675, "num_token_query": 31.4029, "num_token_union": 65.2008, "num_word_context": 202.2672, "num_word_doc": 49.8632, "num_word_query": 23.3252, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1087.8041, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5229, "query_norm": 1.3825, "queue_k_norm": 1.5113, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4029, "sent_len_1": 66.8655, "sent_len_max_0": 127.45, "sent_len_max_1": 191.9263, "stdk": 0.0486, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 88300 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.7042, "doc_norm": 1.5141, "encoder_q-embeddings": 528.6478, "encoder_q-layer.0": 367.8508, "encoder_q-layer.1": 405.339, "encoder_q-layer.10": 634.6635, "encoder_q-layer.11": 1595.0696, "encoder_q-layer.2": 478.9215, "encoder_q-layer.3": 528.6944, "encoder_q-layer.4": 543.6481, "encoder_q-layer.5": 592.9857, "encoder_q-layer.6": 662.2864, "encoder_q-layer.7": 652.6388, "encoder_q-layer.8": 662.1702, "encoder_q-layer.9": 574.1339, "epoch": 0.58, "inbatch_neg_score": 0.526, "inbatch_pos_score": 1.1045, "learning_rate": 6.4444444444444445e-06, "loss": 3.7042, "norm_diff": 0.1277, "norm_loss": 0.0, "num_token_doc": 66.5819, "num_token_overlap": 11.656, "num_token_query": 31.4, "num_token_union": 65.0333, "num_word_context": 202.2003, "num_word_doc": 49.658, "num_word_query": 23.3135, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1082.4097, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5254, "query_norm": 1.3864, "queue_k_norm": 1.5117, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4, "sent_len_1": 66.5819, "sent_len_max_0": 127.3512, "sent_len_max_1": 190.9588, "stdk": 0.0481, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 88400 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.7299, "doc_norm": 1.508, "encoder_q-embeddings": 523.8679, "encoder_q-layer.0": 335.6263, "encoder_q-layer.1": 342.1978, "encoder_q-layer.10": 663.4146, "encoder_q-layer.11": 1636.6702, "encoder_q-layer.2": 393.6716, "encoder_q-layer.3": 402.9295, "encoder_q-layer.4": 430.09, "encoder_q-layer.5": 434.3048, "encoder_q-layer.6": 460.2412, "encoder_q-layer.7": 525.7813, "encoder_q-layer.8": 628.277, "encoder_q-layer.9": 577.4072, "epoch": 0.58, "inbatch_neg_score": 0.5225, "inbatch_pos_score": 1.082, "learning_rate": 6.3888888888888885e-06, "loss": 3.7299, "norm_diff": 0.1184, "norm_loss": 0.0, "num_token_doc": 66.6656, "num_token_overlap": 11.66, "num_token_query": 31.4348, "num_token_union": 65.0992, "num_word_context": 202.3357, "num_word_doc": 49.6922, "num_word_query": 23.3496, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1023.4905, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5234, "query_norm": 1.3896, "queue_k_norm": 1.5097, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4348, "sent_len_1": 66.6656, "sent_len_max_0": 127.5812, "sent_len_max_1": 191.6587, "stdk": 0.0479, "stdq": 0.0426, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 88500 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.7126, "doc_norm": 1.517, "encoder_q-embeddings": 644.3644, "encoder_q-layer.0": 428.8491, "encoder_q-layer.1": 474.0887, "encoder_q-layer.10": 630.7643, "encoder_q-layer.11": 1688.2249, "encoder_q-layer.2": 527.9183, "encoder_q-layer.3": 547.3817, "encoder_q-layer.4": 577.896, "encoder_q-layer.5": 592.1287, "encoder_q-layer.6": 576.8711, "encoder_q-layer.7": 605.8362, "encoder_q-layer.8": 679.4419, "encoder_q-layer.9": 603.0474, "epoch": 0.58, "inbatch_neg_score": 0.5245, "inbatch_pos_score": 1.0938, "learning_rate": 6.333333333333334e-06, "loss": 3.7126, "norm_diff": 0.1231, "norm_loss": 0.0, "num_token_doc": 66.5834, "num_token_overlap": 11.7001, "num_token_query": 31.4546, "num_token_union": 65.0117, "num_word_context": 201.9495, "num_word_doc": 49.6819, "num_word_query": 23.3897, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1128.7508, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5249, "query_norm": 1.3939, "queue_k_norm": 1.512, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4546, "sent_len_1": 66.5834, "sent_len_max_0": 127.605, "sent_len_max_1": 189.47, "stdk": 0.0482, "stdq": 0.0428, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 88600 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.6971, "doc_norm": 1.5124, "encoder_q-embeddings": 505.6546, "encoder_q-layer.0": 338.1082, "encoder_q-layer.1": 348.9829, "encoder_q-layer.10": 653.066, "encoder_q-layer.11": 1586.1154, "encoder_q-layer.2": 386.2261, "encoder_q-layer.3": 393.3336, "encoder_q-layer.4": 395.7633, "encoder_q-layer.5": 392.3158, "encoder_q-layer.6": 445.8224, "encoder_q-layer.7": 476.1219, "encoder_q-layer.8": 595.8716, "encoder_q-layer.9": 575.1996, "epoch": 0.58, "inbatch_neg_score": 0.5235, "inbatch_pos_score": 1.0947, "learning_rate": 6.277777777777778e-06, "loss": 3.6971, "norm_diff": 0.1286, "norm_loss": 0.0, "num_token_doc": 66.7553, "num_token_overlap": 11.6908, "num_token_query": 31.4731, "num_token_union": 65.143, "num_word_context": 202.6797, "num_word_doc": 49.8313, "num_word_query": 23.3875, "postclip_grad_norm": 1.0, "preclip_grad_norm": 977.3548, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5244, "query_norm": 1.3839, "queue_k_norm": 1.513, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4731, "sent_len_1": 66.7553, "sent_len_max_0": 127.6112, "sent_len_max_1": 191.6463, "stdk": 0.0481, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 88700 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.7044, "doc_norm": 1.5122, "encoder_q-embeddings": 1172.8896, "encoder_q-layer.0": 792.3914, "encoder_q-layer.1": 860.4366, "encoder_q-layer.10": 1331.5598, "encoder_q-layer.11": 3303.9797, "encoder_q-layer.2": 966.2206, "encoder_q-layer.3": 998.0363, "encoder_q-layer.4": 1017.7473, "encoder_q-layer.5": 1078.1184, "encoder_q-layer.6": 1098.6345, "encoder_q-layer.7": 1132.426, "encoder_q-layer.8": 1312.2767, "encoder_q-layer.9": 1248.3132, "epoch": 0.58, "inbatch_neg_score": 0.5241, "inbatch_pos_score": 1.082, "learning_rate": 6.222222222222222e-06, "loss": 3.7044, "norm_diff": 0.1236, "norm_loss": 0.0, "num_token_doc": 66.8977, "num_token_overlap": 11.6546, "num_token_query": 31.3493, "num_token_union": 65.1629, "num_word_context": 202.6063, "num_word_doc": 49.934, "num_word_query": 23.2781, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2185.5564, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5244, "query_norm": 1.3886, "queue_k_norm": 1.5119, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3493, "sent_len_1": 66.8977, "sent_len_max_0": 127.4963, "sent_len_max_1": 191.6738, "stdk": 0.048, "stdq": 0.0426, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 88800 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.7053, "doc_norm": 1.5148, "encoder_q-embeddings": 1068.3403, "encoder_q-layer.0": 709.6921, "encoder_q-layer.1": 723.789, "encoder_q-layer.10": 1250.4681, "encoder_q-layer.11": 3364.5752, "encoder_q-layer.2": 825.7198, "encoder_q-layer.3": 873.8423, "encoder_q-layer.4": 941.858, "encoder_q-layer.5": 952.4515, "encoder_q-layer.6": 1053.8541, "encoder_q-layer.7": 1192.6611, "encoder_q-layer.8": 1273.3363, "encoder_q-layer.9": 1197.4008, "epoch": 0.58, "inbatch_neg_score": 0.5276, "inbatch_pos_score": 1.0967, "learning_rate": 6.166666666666667e-06, "loss": 3.7053, "norm_diff": 0.1279, "norm_loss": 0.0, "num_token_doc": 66.7343, "num_token_overlap": 11.6656, "num_token_query": 31.4518, "num_token_union": 65.1399, "num_word_context": 202.1997, "num_word_doc": 49.8054, "num_word_query": 23.3744, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2141.578, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5278, "query_norm": 1.3868, "queue_k_norm": 1.5145, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4518, "sent_len_1": 66.7343, "sent_len_max_0": 127.5113, "sent_len_max_1": 190.2163, "stdk": 0.0481, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 88900 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.7229, "doc_norm": 1.5123, "encoder_q-embeddings": 1909.8412, "encoder_q-layer.0": 1444.5348, "encoder_q-layer.1": 1547.417, "encoder_q-layer.10": 1403.293, "encoder_q-layer.11": 3417.3579, "encoder_q-layer.2": 1593.9976, "encoder_q-layer.3": 1603.2511, "encoder_q-layer.4": 1660.4832, "encoder_q-layer.5": 1826.3137, "encoder_q-layer.6": 1706.9165, "encoder_q-layer.7": 1764.9774, "encoder_q-layer.8": 1441.9771, "encoder_q-layer.9": 1246.1613, "epoch": 0.58, "inbatch_neg_score": 0.5273, "inbatch_pos_score": 1.0889, "learning_rate": 6.111111111111111e-06, "loss": 3.7229, "norm_diff": 0.1313, "norm_loss": 0.0, "num_token_doc": 66.6965, "num_token_overlap": 11.6355, "num_token_query": 31.3845, "num_token_union": 65.165, "num_word_context": 202.3269, "num_word_doc": 49.7802, "num_word_query": 23.3015, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2766.2081, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5264, "query_norm": 1.3809, "queue_k_norm": 1.5128, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3845, "sent_len_1": 66.6965, "sent_len_max_0": 127.4488, "sent_len_max_1": 187.5375, "stdk": 0.048, "stdq": 0.0422, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 89000 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.6954, "doc_norm": 1.5117, "encoder_q-embeddings": 1141.7322, "encoder_q-layer.0": 765.2631, "encoder_q-layer.1": 809.4839, "encoder_q-layer.10": 1348.8055, "encoder_q-layer.11": 3385.606, "encoder_q-layer.2": 913.2234, "encoder_q-layer.3": 984.3002, "encoder_q-layer.4": 1045.2478, "encoder_q-layer.5": 1096.6954, "encoder_q-layer.6": 1096.2571, "encoder_q-layer.7": 1175.3805, "encoder_q-layer.8": 1419.8573, "encoder_q-layer.9": 1259.0447, "epoch": 0.58, "inbatch_neg_score": 0.5278, "inbatch_pos_score": 1.0947, "learning_rate": 6.055555555555556e-06, "loss": 3.6954, "norm_diff": 0.1296, "norm_loss": 0.0, "num_token_doc": 66.7368, "num_token_overlap": 11.68, "num_token_query": 31.44, "num_token_union": 65.1082, "num_word_context": 202.1473, "num_word_doc": 49.8075, "num_word_query": 23.3592, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2191.6689, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5278, "query_norm": 1.3822, "queue_k_norm": 1.5133, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.44, "sent_len_1": 66.7368, "sent_len_max_0": 127.4363, "sent_len_max_1": 189.61, "stdk": 0.048, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 89100 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.7178, "doc_norm": 1.5106, "encoder_q-embeddings": 1783.3822, "encoder_q-layer.0": 1278.3132, "encoder_q-layer.1": 1443.3503, "encoder_q-layer.10": 1335.7238, "encoder_q-layer.11": 3189.9229, "encoder_q-layer.2": 1657.9572, "encoder_q-layer.3": 1688.506, "encoder_q-layer.4": 1850.8058, "encoder_q-layer.5": 1875.8104, "encoder_q-layer.6": 1600.5299, "encoder_q-layer.7": 1477.3694, "encoder_q-layer.8": 1437.3995, "encoder_q-layer.9": 1212.1001, "epoch": 0.58, "inbatch_neg_score": 0.5271, "inbatch_pos_score": 1.0918, "learning_rate": 6e-06, "loss": 3.7178, "norm_diff": 0.1261, "norm_loss": 0.0, "num_token_doc": 66.8752, "num_token_overlap": 11.6576, "num_token_query": 31.3966, "num_token_union": 65.2574, "num_word_context": 202.49, "num_word_doc": 49.9696, "num_word_query": 23.3192, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2625.2247, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5264, "query_norm": 1.3844, "queue_k_norm": 1.5132, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3966, "sent_len_1": 66.8752, "sent_len_max_0": 127.3838, "sent_len_max_1": 187.8438, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 89200 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.7016, "doc_norm": 1.5205, "encoder_q-embeddings": 2305.2568, "encoder_q-layer.0": 1582.4459, "encoder_q-layer.1": 1792.1621, "encoder_q-layer.10": 1238.1207, "encoder_q-layer.11": 3147.4919, "encoder_q-layer.2": 2076.0784, "encoder_q-layer.3": 2299.4492, "encoder_q-layer.4": 2366.6016, "encoder_q-layer.5": 2455.4629, "encoder_q-layer.6": 2324.0425, "encoder_q-layer.7": 2030.9989, "encoder_q-layer.8": 1579.134, "encoder_q-layer.9": 1190.5944, "epoch": 0.58, "inbatch_neg_score": 0.5264, "inbatch_pos_score": 1.1113, "learning_rate": 5.944444444444445e-06, "loss": 3.7016, "norm_diff": 0.1276, "norm_loss": 0.0, "num_token_doc": 66.526, "num_token_overlap": 11.7098, "num_token_query": 31.4462, "num_token_union": 64.9618, "num_word_context": 201.5565, "num_word_doc": 49.6436, "num_word_query": 23.3294, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3191.1775, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5278, "query_norm": 1.3929, "queue_k_norm": 1.514, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4462, "sent_len_1": 66.526, "sent_len_max_0": 127.525, "sent_len_max_1": 189.3725, "stdk": 0.0484, "stdq": 0.0427, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 89300 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.697, "doc_norm": 1.5103, "encoder_q-embeddings": 1527.8534, "encoder_q-layer.0": 1087.8119, "encoder_q-layer.1": 1155.8169, "encoder_q-layer.10": 1350.3179, "encoder_q-layer.11": 3291.041, "encoder_q-layer.2": 1326.2014, "encoder_q-layer.3": 1392.5271, "encoder_q-layer.4": 1440.1257, "encoder_q-layer.5": 1490.7118, "encoder_q-layer.6": 1456.2834, "encoder_q-layer.7": 1604.5177, "encoder_q-layer.8": 1433.4791, "encoder_q-layer.9": 1283.1533, "epoch": 0.58, "inbatch_neg_score": 0.5275, "inbatch_pos_score": 1.0996, "learning_rate": 5.888888888888889e-06, "loss": 3.697, "norm_diff": 0.1314, "norm_loss": 0.0, "num_token_doc": 66.8295, "num_token_overlap": 11.7005, "num_token_query": 31.2715, "num_token_union": 65.0857, "num_word_context": 202.2677, "num_word_doc": 49.8623, "num_word_query": 23.2101, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2483.6943, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5283, "query_norm": 1.3789, "queue_k_norm": 1.5146, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2715, "sent_len_1": 66.8295, "sent_len_max_0": 127.4912, "sent_len_max_1": 187.2562, "stdk": 0.0479, "stdq": 0.0421, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 89400 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.7273, "doc_norm": 1.5152, "encoder_q-embeddings": 1008.8066, "encoder_q-layer.0": 679.3317, "encoder_q-layer.1": 712.6242, "encoder_q-layer.10": 1530.6053, "encoder_q-layer.11": 3564.6421, "encoder_q-layer.2": 776.2907, "encoder_q-layer.3": 797.3566, "encoder_q-layer.4": 847.7545, "encoder_q-layer.5": 886.6481, "encoder_q-layer.6": 1014.9483, "encoder_q-layer.7": 1140.7694, "encoder_q-layer.8": 1349.7753, "encoder_q-layer.9": 1262.376, "epoch": 0.58, "inbatch_neg_score": 0.5289, "inbatch_pos_score": 1.0898, "learning_rate": 5.833333333333334e-06, "loss": 3.7273, "norm_diff": 0.1336, "norm_loss": 0.0, "num_token_doc": 66.9582, "num_token_overlap": 11.6475, "num_token_query": 31.3325, "num_token_union": 65.2541, "num_word_context": 202.6999, "num_word_doc": 49.9841, "num_word_query": 23.2698, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2157.3991, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5283, "query_norm": 1.3816, "queue_k_norm": 1.5157, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3325, "sent_len_1": 66.9582, "sent_len_max_0": 127.3612, "sent_len_max_1": 188.7625, "stdk": 0.0481, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 89500 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.6908, "doc_norm": 1.513, "encoder_q-embeddings": 1296.3765, "encoder_q-layer.0": 896.5934, "encoder_q-layer.1": 958.2036, "encoder_q-layer.10": 1401.4596, "encoder_q-layer.11": 3395.2969, "encoder_q-layer.2": 1104.2781, "encoder_q-layer.3": 1150.0056, "encoder_q-layer.4": 1289.3478, "encoder_q-layer.5": 1254.434, "encoder_q-layer.6": 1223.8743, "encoder_q-layer.7": 1231.1417, "encoder_q-layer.8": 1288.8589, "encoder_q-layer.9": 1155.9674, "epoch": 0.58, "inbatch_neg_score": 0.5304, "inbatch_pos_score": 1.0811, "learning_rate": 5.777777777777778e-06, "loss": 3.6908, "norm_diff": 0.1341, "norm_loss": 0.0, "num_token_doc": 66.5979, "num_token_overlap": 11.6683, "num_token_query": 31.4409, "num_token_union": 65.0531, "num_word_context": 202.003, "num_word_doc": 49.7078, "num_word_query": 23.3404, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2330.2718, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5293, "query_norm": 1.3788, "queue_k_norm": 1.5145, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4409, "sent_len_1": 66.5979, "sent_len_max_0": 127.4363, "sent_len_max_1": 192.1262, "stdk": 0.048, "stdq": 0.042, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 89600 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.7061, "doc_norm": 1.5144, "encoder_q-embeddings": 985.668, "encoder_q-layer.0": 654.0012, "encoder_q-layer.1": 691.5536, "encoder_q-layer.10": 1217.1537, "encoder_q-layer.11": 3040.5044, "encoder_q-layer.2": 778.7175, "encoder_q-layer.3": 782.2424, "encoder_q-layer.4": 813.5385, "encoder_q-layer.5": 839.825, "encoder_q-layer.6": 903.8777, "encoder_q-layer.7": 1017.5026, "encoder_q-layer.8": 1145.9008, "encoder_q-layer.9": 1111.5796, "epoch": 0.58, "inbatch_neg_score": 0.5303, "inbatch_pos_score": 1.1094, "learning_rate": 5.722222222222223e-06, "loss": 3.7061, "norm_diff": 0.1293, "norm_loss": 0.0, "num_token_doc": 66.8637, "num_token_overlap": 11.6982, "num_token_query": 31.4779, "num_token_union": 65.2521, "num_word_context": 202.4868, "num_word_doc": 49.9141, "num_word_query": 23.3881, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1935.6186, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5312, "query_norm": 1.3851, "queue_k_norm": 1.5163, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4779, "sent_len_1": 66.8637, "sent_len_max_0": 127.585, "sent_len_max_1": 188.71, "stdk": 0.0481, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 89700 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.6962, "doc_norm": 1.5078, "encoder_q-embeddings": 1176.3507, "encoder_q-layer.0": 788.1022, "encoder_q-layer.1": 861.8353, "encoder_q-layer.10": 1249.2769, "encoder_q-layer.11": 3296.8621, "encoder_q-layer.2": 974.6882, "encoder_q-layer.3": 1014.5847, "encoder_q-layer.4": 1110.3726, "encoder_q-layer.5": 1152.7288, "encoder_q-layer.6": 1240.3413, "encoder_q-layer.7": 1307.7991, "encoder_q-layer.8": 1391.0209, "encoder_q-layer.9": 1199.0334, "epoch": 0.58, "inbatch_neg_score": 0.5332, "inbatch_pos_score": 1.1035, "learning_rate": 5.666666666666667e-06, "loss": 3.6962, "norm_diff": 0.1179, "norm_loss": 0.0, "num_token_doc": 66.7657, "num_token_overlap": 11.6654, "num_token_query": 31.3608, "num_token_union": 65.1038, "num_word_context": 202.3998, "num_word_doc": 49.7963, "num_word_query": 23.2947, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2234.8672, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5332, "query_norm": 1.3899, "queue_k_norm": 1.5156, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3608, "sent_len_1": 66.7657, "sent_len_max_0": 127.2687, "sent_len_max_1": 190.015, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 89800 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.7074, "doc_norm": 1.5147, "encoder_q-embeddings": 984.7834, "encoder_q-layer.0": 654.7401, "encoder_q-layer.1": 676.3492, "encoder_q-layer.10": 1363.3622, "encoder_q-layer.11": 3202.4148, "encoder_q-layer.2": 753.4749, "encoder_q-layer.3": 765.1666, "encoder_q-layer.4": 839.5276, "encoder_q-layer.5": 846.6758, "encoder_q-layer.6": 913.1571, "encoder_q-layer.7": 1072.5131, "encoder_q-layer.8": 1248.3828, "encoder_q-layer.9": 1183.3545, "epoch": 0.59, "inbatch_neg_score": 0.5341, "inbatch_pos_score": 1.1045, "learning_rate": 5.611111111111112e-06, "loss": 3.7074, "norm_diff": 0.119, "norm_loss": 0.0, "num_token_doc": 66.6961, "num_token_overlap": 11.6714, "num_token_query": 31.3841, "num_token_union": 65.0887, "num_word_context": 202.4324, "num_word_doc": 49.8117, "num_word_query": 23.3131, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2003.1844, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5337, "query_norm": 1.3957, "queue_k_norm": 1.5154, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3841, "sent_len_1": 66.6961, "sent_len_max_0": 127.535, "sent_len_max_1": 188.3013, "stdk": 0.048, "stdq": 0.0427, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 89900 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.7067, "doc_norm": 1.5191, "encoder_q-embeddings": 1398.6964, "encoder_q-layer.0": 949.2985, "encoder_q-layer.1": 1027.7012, "encoder_q-layer.10": 1253.6423, "encoder_q-layer.11": 3311.9116, "encoder_q-layer.2": 1128.7856, "encoder_q-layer.3": 1140.5184, "encoder_q-layer.4": 1166.6947, "encoder_q-layer.5": 1186.7863, "encoder_q-layer.6": 1113.2109, "encoder_q-layer.7": 1207.8485, "encoder_q-layer.8": 1345.948, "encoder_q-layer.9": 1220.0571, "epoch": 0.59, "inbatch_neg_score": 0.5352, "inbatch_pos_score": 1.1094, "learning_rate": 5.555555555555556e-06, "loss": 3.7067, "norm_diff": 0.1303, "norm_loss": 0.0, "num_token_doc": 66.7537, "num_token_overlap": 11.7207, "num_token_query": 31.4256, "num_token_union": 65.1221, "num_word_context": 202.1421, "num_word_doc": 49.8563, "num_word_query": 23.35, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2264.5735, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5342, "query_norm": 1.3889, "queue_k_norm": 1.5158, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4256, "sent_len_1": 66.7537, "sent_len_max_0": 127.475, "sent_len_max_1": 186.1287, "stdk": 0.0482, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 90000 }, { "dev_runtime": 30.3458, "dev_samples_per_second": 2.109, "dev_steps_per_second": 0.033, "epoch": 0.59, "step": 90000, "test_accuracy": 93.310546875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3904704749584198, "test_doc_norm": 1.4753453731536865, "test_inbatch_neg_score": 0.8552895784378052, "test_inbatch_pos_score": 1.746917963027954, "test_loss": 0.3904704749584198, "test_loss_align": 0.9102513790130615, "test_loss_unif": 3.46409273147583, "test_loss_unif_q@queue": 3.46409273147583, "test_norm_diff": 0.010375287383794785, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.5187356472015381, "test_query_norm": 1.483202576637268, "test_queue_k_norm": 1.5164079666137695, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04135643690824509, "test_stdq": 0.04137412831187248, "test_stdqueue_k": 0.04821513220667839, "test_stdqueue_q": 0.0 }, { "dev_runtime": 30.3458, "dev_samples_per_second": 2.109, "dev_steps_per_second": 0.033, "epoch": 0.59, "eval_beir-arguana_ndcg@10": 0.38992, "eval_beir-arguana_recall@10": 0.66145, "eval_beir-arguana_recall@100": 0.95092, "eval_beir-arguana_recall@20": 0.81081, "eval_beir-avg_ndcg@10": 0.37941458333333333, "eval_beir-avg_recall@10": 0.45313516666666664, "eval_beir-avg_recall@100": 0.6383386666666667, "eval_beir-avg_recall@20": 0.5185475, "eval_beir-cqadupstack_ndcg@10": 0.26161583333333327, "eval_beir-cqadupstack_recall@10": 0.3589316666666666, "eval_beir-cqadupstack_recall@100": 0.5957366666666667, "eval_beir-cqadupstack_recall@20": 0.429765, "eval_beir-fiqa_ndcg@10": 0.24876, "eval_beir-fiqa_recall@10": 0.31212, "eval_beir-fiqa_recall@100": 0.58633, "eval_beir-fiqa_recall@20": 0.39197, "eval_beir-nfcorpus_ndcg@10": 0.30373, "eval_beir-nfcorpus_recall@10": 0.14968, "eval_beir-nfcorpus_recall@100": 0.29652, "eval_beir-nfcorpus_recall@20": 0.18749, "eval_beir-nq_ndcg@10": 0.2762, "eval_beir-nq_recall@10": 0.45971, "eval_beir-nq_recall@100": 0.80267, "eval_beir-nq_recall@20": 0.58167, "eval_beir-quora_ndcg@10": 0.77544, "eval_beir-quora_recall@10": 0.88562, "eval_beir-quora_recall@100": 0.97753, "eval_beir-quora_recall@20": 0.92784, "eval_beir-scidocs_ndcg@10": 0.15428, "eval_beir-scidocs_recall@10": 0.16523, "eval_beir-scidocs_recall@100": 0.36568, "eval_beir-scidocs_recall@20": 0.22053, "eval_beir-scifact_ndcg@10": 0.63651, "eval_beir-scifact_recall@10": 0.79622, "eval_beir-scifact_recall@100": 0.91311, "eval_beir-scifact_recall@20": 0.83467, "eval_beir-trec-covid_ndcg@10": 0.5679, "eval_beir-trec-covid_recall@10": 0.612, "eval_beir-trec-covid_recall@100": 0.4582, "eval_beir-trec-covid_recall@20": 0.587, "eval_beir-webis-touche2020_ndcg@10": 0.17979, "eval_beir-webis-touche2020_recall@10": 0.13039, "eval_beir-webis-touche2020_recall@100": 0.43669, "eval_beir-webis-touche2020_recall@20": 0.21373, "eval_senteval-avg_sts": 0.7584010574585223, "eval_senteval-sickr_spearman": 0.7238376804883266, "eval_senteval-stsb_spearman": 0.792964434428718, "step": 90000, "test_accuracy": 93.310546875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3904704749584198, "test_doc_norm": 1.4753453731536865, "test_inbatch_neg_score": 0.8552895784378052, "test_inbatch_pos_score": 1.746917963027954, "test_loss": 0.3904704749584198, "test_loss_align": 0.9102513790130615, "test_loss_unif": 3.46409273147583, "test_loss_unif_q@queue": 3.46409273147583, "test_norm_diff": 0.010375287383794785, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.5187356472015381, "test_query_norm": 1.483202576637268, "test_queue_k_norm": 1.5164079666137695, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04135643690824509, "test_stdq": 0.04137412831187248, "test_stdqueue_k": 0.04821513220667839, "test_stdqueue_q": 0.0 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.6987, "doc_norm": 1.5139, "encoder_q-embeddings": 1495.6886, "encoder_q-layer.0": 1046.0632, "encoder_q-layer.1": 1194.2623, "encoder_q-layer.10": 1199.9791, "encoder_q-layer.11": 3242.9014, "encoder_q-layer.2": 1467.2371, "encoder_q-layer.3": 1564.3469, "encoder_q-layer.4": 1519.761, "encoder_q-layer.5": 1609.4479, "encoder_q-layer.6": 1730.9724, "encoder_q-layer.7": 1560.0961, "encoder_q-layer.8": 1477.8748, "encoder_q-layer.9": 1155.7218, "epoch": 0.59, "inbatch_neg_score": 0.5369, "inbatch_pos_score": 1.0947, "learning_rate": 5.500000000000001e-06, "loss": 3.6987, "norm_diff": 0.1322, "norm_loss": 0.0, "num_token_doc": 66.8847, "num_token_overlap": 11.7344, "num_token_query": 31.5606, "num_token_union": 65.2496, "num_word_context": 202.6197, "num_word_doc": 49.8957, "num_word_query": 23.4357, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2536.6038, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5352, "query_norm": 1.3817, "queue_k_norm": 1.5146, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.5606, "sent_len_1": 66.8847, "sent_len_max_0": 127.5, "sent_len_max_1": 188.8762, "stdk": 0.048, "stdq": 0.0421, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 90100 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.7378, "doc_norm": 1.5162, "encoder_q-embeddings": 1206.5687, "encoder_q-layer.0": 838.105, "encoder_q-layer.1": 863.6204, "encoder_q-layer.10": 1241.3018, "encoder_q-layer.11": 3233.9939, "encoder_q-layer.2": 952.8192, "encoder_q-layer.3": 986.9636, "encoder_q-layer.4": 1021.1458, "encoder_q-layer.5": 1017.715, "encoder_q-layer.6": 1079.3438, "encoder_q-layer.7": 1106.219, "encoder_q-layer.8": 1278.9462, "encoder_q-layer.9": 1179.0123, "epoch": 0.59, "inbatch_neg_score": 0.533, "inbatch_pos_score": 1.0957, "learning_rate": 5.444444444444445e-06, "loss": 3.7378, "norm_diff": 0.1306, "norm_loss": 0.0, "num_token_doc": 66.7256, "num_token_overlap": 11.6352, "num_token_query": 31.3708, "num_token_union": 65.1468, "num_word_context": 202.4175, "num_word_doc": 49.8219, "num_word_query": 23.2891, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2170.2619, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5337, "query_norm": 1.3857, "queue_k_norm": 1.517, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3708, "sent_len_1": 66.7256, "sent_len_max_0": 127.3863, "sent_len_max_1": 187.855, "stdk": 0.0481, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 90200 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.6857, "doc_norm": 1.5165, "encoder_q-embeddings": 1002.8967, "encoder_q-layer.0": 681.3776, "encoder_q-layer.1": 733.1483, "encoder_q-layer.10": 1282.417, "encoder_q-layer.11": 3243.0566, "encoder_q-layer.2": 830.3594, "encoder_q-layer.3": 858.6051, "encoder_q-layer.4": 877.8461, "encoder_q-layer.5": 855.1536, "encoder_q-layer.6": 931.9052, "encoder_q-layer.7": 1009.1345, "encoder_q-layer.8": 1200.0747, "encoder_q-layer.9": 1150.2473, "epoch": 0.59, "inbatch_neg_score": 0.5346, "inbatch_pos_score": 1.0986, "learning_rate": 5.388888888888889e-06, "loss": 3.6857, "norm_diff": 0.1471, "norm_loss": 0.0, "num_token_doc": 66.7248, "num_token_overlap": 11.6697, "num_token_query": 31.3213, "num_token_union": 65.0872, "num_word_context": 202.2926, "num_word_doc": 49.7979, "num_word_query": 23.2576, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2040.755, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5352, "query_norm": 1.3694, "queue_k_norm": 1.5167, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3213, "sent_len_1": 66.7248, "sent_len_max_0": 127.4188, "sent_len_max_1": 189.5662, "stdk": 0.0481, "stdq": 0.0415, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 90300 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.6886, "doc_norm": 1.5125, "encoder_q-embeddings": 1003.5544, "encoder_q-layer.0": 671.864, "encoder_q-layer.1": 695.2519, "encoder_q-layer.10": 1337.5498, "encoder_q-layer.11": 3216.7483, "encoder_q-layer.2": 785.1219, "encoder_q-layer.3": 822.0854, "encoder_q-layer.4": 858.9465, "encoder_q-layer.5": 894.1683, "encoder_q-layer.6": 999.5524, "encoder_q-layer.7": 1129.6893, "encoder_q-layer.8": 1380.23, "encoder_q-layer.9": 1251.2181, "epoch": 0.59, "inbatch_neg_score": 0.5386, "inbatch_pos_score": 1.1074, "learning_rate": 5.333333333333334e-06, "loss": 3.6886, "norm_diff": 0.1273, "norm_loss": 0.0, "num_token_doc": 66.755, "num_token_overlap": 11.7265, "num_token_query": 31.4977, "num_token_union": 65.1481, "num_word_context": 202.2411, "num_word_doc": 49.8549, "num_word_query": 23.4068, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2046.9619, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5381, "query_norm": 1.3852, "queue_k_norm": 1.5157, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4977, "sent_len_1": 66.755, "sent_len_max_0": 127.5037, "sent_len_max_1": 188.4863, "stdk": 0.0479, "stdq": 0.0421, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 90400 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.7049, "doc_norm": 1.5147, "encoder_q-embeddings": 1235.718, "encoder_q-layer.0": 812.9714, "encoder_q-layer.1": 884.5464, "encoder_q-layer.10": 1348.0194, "encoder_q-layer.11": 3161.2432, "encoder_q-layer.2": 1027.1825, "encoder_q-layer.3": 1055.4347, "encoder_q-layer.4": 1136.7018, "encoder_q-layer.5": 1210.4351, "encoder_q-layer.6": 1255.7551, "encoder_q-layer.7": 1268.3605, "encoder_q-layer.8": 1358.348, "encoder_q-layer.9": 1216.1783, "epoch": 0.59, "inbatch_neg_score": 0.5367, "inbatch_pos_score": 1.1162, "learning_rate": 5.277777777777778e-06, "loss": 3.7049, "norm_diff": 0.117, "norm_loss": 0.0, "num_token_doc": 66.8329, "num_token_overlap": 11.6673, "num_token_query": 31.3019, "num_token_union": 65.1307, "num_word_context": 202.4169, "num_word_doc": 49.8955, "num_word_query": 23.2524, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2187.1073, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5371, "query_norm": 1.3977, "queue_k_norm": 1.5162, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3019, "sent_len_1": 66.8329, "sent_len_max_0": 127.3725, "sent_len_max_1": 189.1538, "stdk": 0.048, "stdq": 0.0427, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 90500 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.7059, "doc_norm": 1.5187, "encoder_q-embeddings": 1240.6616, "encoder_q-layer.0": 847.5731, "encoder_q-layer.1": 945.3727, "encoder_q-layer.10": 1223.9758, "encoder_q-layer.11": 3290.4956, "encoder_q-layer.2": 1076.5004, "encoder_q-layer.3": 1097.8232, "encoder_q-layer.4": 1128.4501, "encoder_q-layer.5": 1095.4111, "encoder_q-layer.6": 1161.1215, "encoder_q-layer.7": 1138.4229, "encoder_q-layer.8": 1252.5634, "encoder_q-layer.9": 1148.2727, "epoch": 0.59, "inbatch_neg_score": 0.5426, "inbatch_pos_score": 1.1143, "learning_rate": 5.2222222222222226e-06, "loss": 3.7059, "norm_diff": 0.1331, "norm_loss": 0.0, "num_token_doc": 66.836, "num_token_overlap": 11.7531, "num_token_query": 31.5812, "num_token_union": 65.2023, "num_word_context": 202.7254, "num_word_doc": 49.8825, "num_word_query": 23.4629, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2213.8248, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5415, "query_norm": 1.3856, "queue_k_norm": 1.5165, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5812, "sent_len_1": 66.836, "sent_len_max_0": 127.6275, "sent_len_max_1": 192.2637, "stdk": 0.0481, "stdq": 0.0421, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 90600 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.7043, "doc_norm": 1.5157, "encoder_q-embeddings": 1743.1993, "encoder_q-layer.0": 1242.2356, "encoder_q-layer.1": 1355.6189, "encoder_q-layer.10": 1399.626, "encoder_q-layer.11": 3353.9146, "encoder_q-layer.2": 1532.2264, "encoder_q-layer.3": 1542.6993, "encoder_q-layer.4": 1637.3165, "encoder_q-layer.5": 1632.0579, "encoder_q-layer.6": 1317.1628, "encoder_q-layer.7": 1224.2513, "encoder_q-layer.8": 1412.5712, "encoder_q-layer.9": 1220.9589, "epoch": 0.59, "inbatch_neg_score": 0.5371, "inbatch_pos_score": 1.1299, "learning_rate": 5.166666666666667e-06, "loss": 3.7043, "norm_diff": 0.1225, "norm_loss": 0.0, "num_token_doc": 66.7066, "num_token_overlap": 11.7223, "num_token_query": 31.4885, "num_token_union": 65.1083, "num_word_context": 202.446, "num_word_doc": 49.8013, "num_word_query": 23.3819, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2550.4471, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5381, "query_norm": 1.3933, "queue_k_norm": 1.5176, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4885, "sent_len_1": 66.7066, "sent_len_max_0": 127.4375, "sent_len_max_1": 188.8837, "stdk": 0.048, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 90700 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.7078, "doc_norm": 1.5108, "encoder_q-embeddings": 3039.9333, "encoder_q-layer.0": 2100.563, "encoder_q-layer.1": 2376.135, "encoder_q-layer.10": 2500.5295, "encoder_q-layer.11": 6628.5273, "encoder_q-layer.2": 2714.8428, "encoder_q-layer.3": 2829.0198, "encoder_q-layer.4": 3153.2192, "encoder_q-layer.5": 3214.4297, "encoder_q-layer.6": 2841.9014, "encoder_q-layer.7": 2596.7236, "encoder_q-layer.8": 2692.9441, "encoder_q-layer.9": 2370.906, "epoch": 0.59, "inbatch_neg_score": 0.5423, "inbatch_pos_score": 1.0986, "learning_rate": 5.1111111111111115e-06, "loss": 3.7078, "norm_diff": 0.116, "norm_loss": 0.0, "num_token_doc": 66.7147, "num_token_overlap": 11.6358, "num_token_query": 31.2872, "num_token_union": 65.1241, "num_word_context": 202.1636, "num_word_doc": 49.8392, "num_word_query": 23.2417, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4974.5827, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.541, "query_norm": 1.3948, "queue_k_norm": 1.5168, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2872, "sent_len_1": 66.7147, "sent_len_max_0": 127.5837, "sent_len_max_1": 187.5863, "stdk": 0.0478, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 90800 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.716, "doc_norm": 1.522, "encoder_q-embeddings": 1901.2855, "encoder_q-layer.0": 1283.9708, "encoder_q-layer.1": 1376.7893, "encoder_q-layer.10": 2480.7046, "encoder_q-layer.11": 6292.7607, "encoder_q-layer.2": 1569.9635, "encoder_q-layer.3": 1635.9611, "encoder_q-layer.4": 1716.0836, "encoder_q-layer.5": 1816.8997, "encoder_q-layer.6": 1884.8259, "encoder_q-layer.7": 2095.8098, "encoder_q-layer.8": 2450.7449, "encoder_q-layer.9": 2251.3889, "epoch": 0.59, "inbatch_neg_score": 0.5412, "inbatch_pos_score": 1.1318, "learning_rate": 5.0555555555555555e-06, "loss": 3.716, "norm_diff": 0.121, "norm_loss": 0.0, "num_token_doc": 66.745, "num_token_overlap": 11.6249, "num_token_query": 31.3364, "num_token_union": 65.163, "num_word_context": 202.3577, "num_word_doc": 49.8271, "num_word_query": 23.2668, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3987.454, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.541, "query_norm": 1.4009, "queue_k_norm": 1.5188, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3364, "sent_len_1": 66.745, "sent_len_max_0": 127.505, "sent_len_max_1": 188.4712, "stdk": 0.0482, "stdq": 0.0428, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 90900 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.7033, "doc_norm": 1.5211, "encoder_q-embeddings": 2983.7576, "encoder_q-layer.0": 2156.9568, "encoder_q-layer.1": 2367.0015, "encoder_q-layer.10": 2720.8745, "encoder_q-layer.11": 6817.1763, "encoder_q-layer.2": 2880.3533, "encoder_q-layer.3": 2916.1216, "encoder_q-layer.4": 3033.4504, "encoder_q-layer.5": 2904.9775, "encoder_q-layer.6": 2778.7695, "encoder_q-layer.7": 2797.3628, "encoder_q-layer.8": 2922.3057, "encoder_q-layer.9": 2518.7537, "epoch": 0.59, "inbatch_neg_score": 0.5439, "inbatch_pos_score": 1.1094, "learning_rate": 5e-06, "loss": 3.7033, "norm_diff": 0.1191, "norm_loss": 0.0, "num_token_doc": 66.9216, "num_token_overlap": 11.7267, "num_token_query": 31.4223, "num_token_union": 65.1604, "num_word_context": 202.6003, "num_word_doc": 49.9371, "num_word_query": 23.3473, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5074.7701, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.543, "query_norm": 1.402, "queue_k_norm": 1.52, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4223, "sent_len_1": 66.9216, "sent_len_max_0": 127.545, "sent_len_max_1": 190.4712, "stdk": 0.0482, "stdq": 0.0428, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 91000 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.7054, "doc_norm": 1.5113, "encoder_q-embeddings": 2024.7001, "encoder_q-layer.0": 1337.1154, "encoder_q-layer.1": 1389.2922, "encoder_q-layer.10": 2497.8721, "encoder_q-layer.11": 6477.7866, "encoder_q-layer.2": 1598.8771, "encoder_q-layer.3": 1640.3921, "encoder_q-layer.4": 1751.3571, "encoder_q-layer.5": 1782.9382, "encoder_q-layer.6": 1944.329, "encoder_q-layer.7": 2016.9688, "encoder_q-layer.8": 2401.6406, "encoder_q-layer.9": 2209.4143, "epoch": 0.59, "inbatch_neg_score": 0.5418, "inbatch_pos_score": 1.1191, "learning_rate": 4.9444444444444444e-06, "loss": 3.7054, "norm_diff": 0.1255, "norm_loss": 0.0, "num_token_doc": 66.8652, "num_token_overlap": 11.6985, "num_token_query": 31.3487, "num_token_union": 65.1637, "num_word_context": 202.3833, "num_word_doc": 49.8979, "num_word_query": 23.2982, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4029.0459, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5425, "query_norm": 1.3859, "queue_k_norm": 1.5193, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3487, "sent_len_1": 66.8652, "sent_len_max_0": 127.55, "sent_len_max_1": 189.4675, "stdk": 0.0478, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 91100 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.707, "doc_norm": 1.519, "encoder_q-embeddings": 2125.1838, "encoder_q-layer.0": 1394.1331, "encoder_q-layer.1": 1526.5894, "encoder_q-layer.10": 2525.5996, "encoder_q-layer.11": 6462.4102, "encoder_q-layer.2": 1831.916, "encoder_q-layer.3": 1914.1772, "encoder_q-layer.4": 1957.5028, "encoder_q-layer.5": 2017.9791, "encoder_q-layer.6": 2274.9692, "encoder_q-layer.7": 2433.9143, "encoder_q-layer.8": 2466.3401, "encoder_q-layer.9": 2246.4248, "epoch": 0.59, "inbatch_neg_score": 0.5425, "inbatch_pos_score": 1.1279, "learning_rate": 4.888888888888889e-06, "loss": 3.707, "norm_diff": 0.1204, "norm_loss": 0.0, "num_token_doc": 66.8409, "num_token_overlap": 11.691, "num_token_query": 31.4857, "num_token_union": 65.2089, "num_word_context": 202.4924, "num_word_doc": 49.8642, "num_word_query": 23.3877, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4206.0294, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.543, "query_norm": 1.3986, "queue_k_norm": 1.5192, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4857, "sent_len_1": 66.8409, "sent_len_max_0": 127.3937, "sent_len_max_1": 190.6975, "stdk": 0.0481, "stdq": 0.0428, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 91200 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.7059, "doc_norm": 1.5189, "encoder_q-embeddings": 2642.2119, "encoder_q-layer.0": 1902.222, "encoder_q-layer.1": 1931.3889, "encoder_q-layer.10": 2635.927, "encoder_q-layer.11": 6806.873, "encoder_q-layer.2": 2117.4836, "encoder_q-layer.3": 2110.1165, "encoder_q-layer.4": 2029.3129, "encoder_q-layer.5": 2036.3837, "encoder_q-layer.6": 2126.1829, "encoder_q-layer.7": 2242.7178, "encoder_q-layer.8": 2745.9241, "encoder_q-layer.9": 2416.0007, "epoch": 0.59, "inbatch_neg_score": 0.5461, "inbatch_pos_score": 1.1064, "learning_rate": 4.833333333333333e-06, "loss": 3.7059, "norm_diff": 0.1317, "norm_loss": 0.0, "num_token_doc": 66.743, "num_token_overlap": 11.698, "num_token_query": 31.407, "num_token_union": 65.123, "num_word_context": 202.4672, "num_word_doc": 49.8219, "num_word_query": 23.3155, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4557.8379, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5449, "query_norm": 1.3872, "queue_k_norm": 1.5205, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.407, "sent_len_1": 66.743, "sent_len_max_0": 127.4513, "sent_len_max_1": 187.1025, "stdk": 0.0481, "stdq": 0.0422, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 91300 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.6981, "doc_norm": 1.5196, "encoder_q-embeddings": 1291.0533, "encoder_q-layer.0": 890.6638, "encoder_q-layer.1": 960.3076, "encoder_q-layer.10": 1224.5996, "encoder_q-layer.11": 3254.344, "encoder_q-layer.2": 1140.8822, "encoder_q-layer.3": 1152.4441, "encoder_q-layer.4": 1203.8267, "encoder_q-layer.5": 1192.8184, "encoder_q-layer.6": 1172.312, "encoder_q-layer.7": 1113.5604, "encoder_q-layer.8": 1225.3478, "encoder_q-layer.9": 1158.8938, "epoch": 0.59, "inbatch_neg_score": 0.5467, "inbatch_pos_score": 1.127, "learning_rate": 4.777777777777778e-06, "loss": 3.6981, "norm_diff": 0.12, "norm_loss": 0.0, "num_token_doc": 66.7432, "num_token_overlap": 11.7007, "num_token_query": 31.3296, "num_token_union": 65.0463, "num_word_context": 202.2455, "num_word_doc": 49.8124, "num_word_query": 23.2625, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2246.8656, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5449, "query_norm": 1.3996, "queue_k_norm": 1.5184, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3296, "sent_len_1": 66.7432, "sent_len_max_0": 127.4638, "sent_len_max_1": 188.4263, "stdk": 0.0481, "stdq": 0.0427, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 91400 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.7328, "doc_norm": 1.519, "encoder_q-embeddings": 1051.0155, "encoder_q-layer.0": 697.8179, "encoder_q-layer.1": 711.2232, "encoder_q-layer.10": 1334.8723, "encoder_q-layer.11": 3335.2258, "encoder_q-layer.2": 802.4506, "encoder_q-layer.3": 830.1415, "encoder_q-layer.4": 898.4391, "encoder_q-layer.5": 915.7013, "encoder_q-layer.6": 953.0264, "encoder_q-layer.7": 1057.3062, "encoder_q-layer.8": 1202.6957, "encoder_q-layer.9": 1135.575, "epoch": 0.6, "inbatch_neg_score": 0.5445, "inbatch_pos_score": 1.1113, "learning_rate": 4.722222222222222e-06, "loss": 3.7328, "norm_diff": 0.1286, "norm_loss": 0.0, "num_token_doc": 66.7809, "num_token_overlap": 11.6536, "num_token_query": 31.367, "num_token_union": 65.1217, "num_word_context": 202.2712, "num_word_doc": 49.8168, "num_word_query": 23.3, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2076.9769, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5449, "query_norm": 1.3903, "queue_k_norm": 1.5204, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.367, "sent_len_1": 66.7809, "sent_len_max_0": 127.5238, "sent_len_max_1": 189.3063, "stdk": 0.0481, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 91500 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.7031, "doc_norm": 1.5166, "encoder_q-embeddings": 1195.2469, "encoder_q-layer.0": 843.339, "encoder_q-layer.1": 988.9628, "encoder_q-layer.10": 1268.0981, "encoder_q-layer.11": 3304.7368, "encoder_q-layer.2": 1154.8615, "encoder_q-layer.3": 1195.5175, "encoder_q-layer.4": 1313.6224, "encoder_q-layer.5": 1312.9402, "encoder_q-layer.6": 1349.4375, "encoder_q-layer.7": 1229.8364, "encoder_q-layer.8": 1372.5905, "encoder_q-layer.9": 1167.2944, "epoch": 0.6, "inbatch_neg_score": 0.5449, "inbatch_pos_score": 1.123, "learning_rate": 4.666666666666667e-06, "loss": 3.7031, "norm_diff": 0.1237, "norm_loss": 0.0, "num_token_doc": 67.0682, "num_token_overlap": 11.6931, "num_token_query": 31.3539, "num_token_union": 65.2325, "num_word_context": 202.3922, "num_word_doc": 50.0558, "num_word_query": 23.2959, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2306.8132, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5469, "query_norm": 1.3929, "queue_k_norm": 1.5206, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3539, "sent_len_1": 67.0682, "sent_len_max_0": 127.4513, "sent_len_max_1": 190.3462, "stdk": 0.048, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 91600 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.7125, "doc_norm": 1.5176, "encoder_q-embeddings": 1364.5737, "encoder_q-layer.0": 936.0374, "encoder_q-layer.1": 964.2544, "encoder_q-layer.10": 1212.1477, "encoder_q-layer.11": 3167.2698, "encoder_q-layer.2": 1146.439, "encoder_q-layer.3": 1181.0923, "encoder_q-layer.4": 1305.3298, "encoder_q-layer.5": 1340.2416, "encoder_q-layer.6": 1374.6212, "encoder_q-layer.7": 1309.7961, "encoder_q-layer.8": 1337.8669, "encoder_q-layer.9": 1207.2539, "epoch": 0.6, "inbatch_neg_score": 0.5456, "inbatch_pos_score": 1.1172, "learning_rate": 4.611111111111111e-06, "loss": 3.7125, "norm_diff": 0.1295, "norm_loss": 0.0, "num_token_doc": 66.8165, "num_token_overlap": 11.6529, "num_token_query": 31.3085, "num_token_union": 65.112, "num_word_context": 202.6941, "num_word_doc": 49.9267, "num_word_query": 23.2554, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2298.1958, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5459, "query_norm": 1.388, "queue_k_norm": 1.5206, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3085, "sent_len_1": 66.8165, "sent_len_max_0": 127.3487, "sent_len_max_1": 189.495, "stdk": 0.048, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 91700 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.6901, "doc_norm": 1.5179, "encoder_q-embeddings": 1524.218, "encoder_q-layer.0": 1088.7057, "encoder_q-layer.1": 1151.4927, "encoder_q-layer.10": 1279.6545, "encoder_q-layer.11": 3343.3162, "encoder_q-layer.2": 1332.4219, "encoder_q-layer.3": 1394.799, "encoder_q-layer.4": 1400.0035, "encoder_q-layer.5": 1284.0879, "encoder_q-layer.6": 1330.9846, "encoder_q-layer.7": 1331.3618, "encoder_q-layer.8": 1551.3807, "encoder_q-layer.9": 1302.6685, "epoch": 0.6, "inbatch_neg_score": 0.5468, "inbatch_pos_score": 1.1279, "learning_rate": 4.555555555555556e-06, "loss": 3.6901, "norm_diff": 0.1183, "norm_loss": 0.0, "num_token_doc": 66.779, "num_token_overlap": 11.7298, "num_token_query": 31.4606, "num_token_union": 65.1192, "num_word_context": 202.6212, "num_word_doc": 49.8465, "num_word_query": 23.3919, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2489.7367, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5474, "query_norm": 1.3996, "queue_k_norm": 1.5227, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4606, "sent_len_1": 66.779, "sent_len_max_0": 127.6038, "sent_len_max_1": 190.0325, "stdk": 0.048, "stdq": 0.0427, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 91800 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.7191, "doc_norm": 1.5185, "encoder_q-embeddings": 1126.8712, "encoder_q-layer.0": 725.0284, "encoder_q-layer.1": 782.1677, "encoder_q-layer.10": 1263.5718, "encoder_q-layer.11": 3338.5342, "encoder_q-layer.2": 875.6174, "encoder_q-layer.3": 921.2676, "encoder_q-layer.4": 991.0374, "encoder_q-layer.5": 1009.0585, "encoder_q-layer.6": 1093.2159, "encoder_q-layer.7": 1170.5751, "encoder_q-layer.8": 1296.1602, "encoder_q-layer.9": 1187.8462, "epoch": 0.6, "inbatch_neg_score": 0.5481, "inbatch_pos_score": 1.1084, "learning_rate": 4.5e-06, "loss": 3.7191, "norm_diff": 0.1281, "norm_loss": 0.0, "num_token_doc": 66.7, "num_token_overlap": 11.6621, "num_token_query": 31.4209, "num_token_union": 65.1769, "num_word_context": 202.4153, "num_word_doc": 49.7548, "num_word_query": 23.3518, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2174.8217, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5474, "query_norm": 1.3904, "queue_k_norm": 1.5219, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4209, "sent_len_1": 66.7, "sent_len_max_0": 127.3162, "sent_len_max_1": 187.6937, "stdk": 0.048, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 91900 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.7024, "doc_norm": 1.5161, "encoder_q-embeddings": 949.6698, "encoder_q-layer.0": 657.4186, "encoder_q-layer.1": 690.3796, "encoder_q-layer.10": 1313.3699, "encoder_q-layer.11": 3156.4907, "encoder_q-layer.2": 772.6865, "encoder_q-layer.3": 822.6219, "encoder_q-layer.4": 880.4943, "encoder_q-layer.5": 885.8433, "encoder_q-layer.6": 913.0779, "encoder_q-layer.7": 1028.4193, "encoder_q-layer.8": 1220.5374, "encoder_q-layer.9": 1112.9954, "epoch": 0.6, "inbatch_neg_score": 0.5461, "inbatch_pos_score": 1.1289, "learning_rate": 4.444444444444445e-06, "loss": 3.7024, "norm_diff": 0.125, "norm_loss": 0.0, "num_token_doc": 66.6372, "num_token_overlap": 11.7467, "num_token_query": 31.5201, "num_token_union": 65.1123, "num_word_context": 202.4778, "num_word_doc": 49.7556, "num_word_query": 23.4475, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2017.8075, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5464, "query_norm": 1.3911, "queue_k_norm": 1.5195, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5201, "sent_len_1": 66.6372, "sent_len_max_0": 127.3225, "sent_len_max_1": 186.6138, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 92000 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.6979, "doc_norm": 1.5135, "encoder_q-embeddings": 1508.0547, "encoder_q-layer.0": 1035.7371, "encoder_q-layer.1": 1210.6202, "encoder_q-layer.10": 1339.812, "encoder_q-layer.11": 3350.1084, "encoder_q-layer.2": 1368.9347, "encoder_q-layer.3": 1438.0214, "encoder_q-layer.4": 1544.8539, "encoder_q-layer.5": 1474.5767, "encoder_q-layer.6": 1466.2635, "encoder_q-layer.7": 1444.4988, "encoder_q-layer.8": 1442.7426, "encoder_q-layer.9": 1206.5901, "epoch": 0.6, "inbatch_neg_score": 0.5477, "inbatch_pos_score": 1.0996, "learning_rate": 4.388888888888889e-06, "loss": 3.6979, "norm_diff": 0.1243, "norm_loss": 0.0, "num_token_doc": 66.6612, "num_token_overlap": 11.6823, "num_token_query": 31.4734, "num_token_union": 65.1597, "num_word_context": 202.5365, "num_word_doc": 49.7489, "num_word_query": 23.3909, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2512.7525, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5479, "query_norm": 1.3892, "queue_k_norm": 1.5202, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4734, "sent_len_1": 66.6612, "sent_len_max_0": 127.5913, "sent_len_max_1": 187.7175, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 92100 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.6911, "doc_norm": 1.5228, "encoder_q-embeddings": 2142.436, "encoder_q-layer.0": 1520.3008, "encoder_q-layer.1": 1668.682, "encoder_q-layer.10": 1245.3422, "encoder_q-layer.11": 3315.3762, "encoder_q-layer.2": 1934.2019, "encoder_q-layer.3": 1902.7163, "encoder_q-layer.4": 1954.1256, "encoder_q-layer.5": 1990.55, "encoder_q-layer.6": 1745.6025, "encoder_q-layer.7": 1541.365, "encoder_q-layer.8": 1421.3574, "encoder_q-layer.9": 1178.3428, "epoch": 0.6, "inbatch_neg_score": 0.5478, "inbatch_pos_score": 1.0986, "learning_rate": 4.333333333333334e-06, "loss": 3.6911, "norm_diff": 0.1358, "norm_loss": 0.0, "num_token_doc": 66.6203, "num_token_overlap": 11.621, "num_token_query": 31.1714, "num_token_union": 64.9591, "num_word_context": 201.962, "num_word_doc": 49.7628, "num_word_query": 23.1369, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2910.3423, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5479, "query_norm": 1.3871, "queue_k_norm": 1.5205, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.1714, "sent_len_1": 66.6203, "sent_len_max_0": 127.3187, "sent_len_max_1": 187.89, "stdk": 0.0482, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 92200 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.7114, "doc_norm": 1.5298, "encoder_q-embeddings": 10707.5381, "encoder_q-layer.0": 8848.043, "encoder_q-layer.1": 10099.0166, "encoder_q-layer.10": 1407.8715, "encoder_q-layer.11": 3414.6345, "encoder_q-layer.2": 4402.6211, "encoder_q-layer.3": 1809.174, "encoder_q-layer.4": 1368.0374, "encoder_q-layer.5": 1324.9761, "encoder_q-layer.6": 1265.2799, "encoder_q-layer.7": 1322.1077, "encoder_q-layer.8": 1350.1786, "encoder_q-layer.9": 1223.4121, "epoch": 0.6, "inbatch_neg_score": 0.5485, "inbatch_pos_score": 1.124, "learning_rate": 4.277777777777778e-06, "loss": 3.7114, "norm_diff": 0.1334, "norm_loss": 0.0, "num_token_doc": 66.8597, "num_token_overlap": 11.6275, "num_token_query": 31.2437, "num_token_union": 65.1045, "num_word_context": 202.2167, "num_word_doc": 49.8565, "num_word_query": 23.2026, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8753.6032, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5493, "query_norm": 1.3964, "queue_k_norm": 1.5221, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2437, "sent_len_1": 66.8597, "sent_len_max_0": 127.3625, "sent_len_max_1": 190.785, "stdk": 0.0484, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 92300 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.721, "doc_norm": 1.5278, "encoder_q-embeddings": 1166.6038, "encoder_q-layer.0": 793.8801, "encoder_q-layer.1": 888.9732, "encoder_q-layer.10": 1317.8264, "encoder_q-layer.11": 3442.4307, "encoder_q-layer.2": 1003.4573, "encoder_q-layer.3": 1007.2087, "encoder_q-layer.4": 1023.8821, "encoder_q-layer.5": 1044.2944, "encoder_q-layer.6": 1076.3776, "encoder_q-layer.7": 1145.9514, "encoder_q-layer.8": 1295.8506, "encoder_q-layer.9": 1241.5607, "epoch": 0.6, "inbatch_neg_score": 0.5461, "inbatch_pos_score": 1.1152, "learning_rate": 4.222222222222223e-06, "loss": 3.721, "norm_diff": 0.1495, "norm_loss": 0.0, "num_token_doc": 66.7949, "num_token_overlap": 11.6477, "num_token_query": 31.2306, "num_token_union": 65.0489, "num_word_context": 202.3721, "num_word_doc": 49.8609, "num_word_query": 23.2034, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2174.9237, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5479, "query_norm": 1.3784, "queue_k_norm": 1.5213, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2306, "sent_len_1": 66.7949, "sent_len_max_0": 127.5075, "sent_len_max_1": 189.0163, "stdk": 0.0484, "stdq": 0.0419, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 92400 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.7071, "doc_norm": 1.5174, "encoder_q-embeddings": 1564.7223, "encoder_q-layer.0": 1093.7686, "encoder_q-layer.1": 1186.7721, "encoder_q-layer.10": 1211.4124, "encoder_q-layer.11": 3266.5271, "encoder_q-layer.2": 1432.1356, "encoder_q-layer.3": 1436.332, "encoder_q-layer.4": 1462.6222, "encoder_q-layer.5": 1453.0748, "encoder_q-layer.6": 1447.5366, "encoder_q-layer.7": 1338.7114, "encoder_q-layer.8": 1361.9512, "encoder_q-layer.9": 1168.7607, "epoch": 0.6, "inbatch_neg_score": 0.5484, "inbatch_pos_score": 1.1152, "learning_rate": 4.166666666666667e-06, "loss": 3.7071, "norm_diff": 0.129, "norm_loss": 0.0, "num_token_doc": 66.8196, "num_token_overlap": 11.705, "num_token_query": 31.413, "num_token_union": 65.1157, "num_word_context": 202.4242, "num_word_doc": 49.8313, "num_word_query": 23.339, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2491.6479, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5488, "query_norm": 1.3884, "queue_k_norm": 1.5233, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.413, "sent_len_1": 66.8196, "sent_len_max_0": 127.4125, "sent_len_max_1": 188.9338, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 92500 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.7034, "doc_norm": 1.5217, "encoder_q-embeddings": 1162.5797, "encoder_q-layer.0": 777.1348, "encoder_q-layer.1": 806.0856, "encoder_q-layer.10": 1391.9766, "encoder_q-layer.11": 3243.5798, "encoder_q-layer.2": 919.5978, "encoder_q-layer.3": 993.8148, "encoder_q-layer.4": 1058.5083, "encoder_q-layer.5": 1064.9088, "encoder_q-layer.6": 1138.3584, "encoder_q-layer.7": 1211.9778, "encoder_q-layer.8": 1330.1698, "encoder_q-layer.9": 1201.3295, "epoch": 0.6, "inbatch_neg_score": 0.548, "inbatch_pos_score": 1.1123, "learning_rate": 4.111111111111112e-06, "loss": 3.7034, "norm_diff": 0.1423, "norm_loss": 0.0, "num_token_doc": 66.7626, "num_token_overlap": 11.6753, "num_token_query": 31.4269, "num_token_union": 65.1244, "num_word_context": 201.9814, "num_word_doc": 49.8136, "num_word_query": 23.3506, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2179.3186, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5493, "query_norm": 1.3794, "queue_k_norm": 1.524, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4269, "sent_len_1": 66.7626, "sent_len_max_0": 127.5512, "sent_len_max_1": 189.2912, "stdk": 0.0481, "stdq": 0.0419, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 92600 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.7092, "doc_norm": 1.5237, "encoder_q-embeddings": 1455.3086, "encoder_q-layer.0": 1035.0793, "encoder_q-layer.1": 1154.1123, "encoder_q-layer.10": 1337.618, "encoder_q-layer.11": 3623.1809, "encoder_q-layer.2": 1364.2809, "encoder_q-layer.3": 1492.5629, "encoder_q-layer.4": 1587.8994, "encoder_q-layer.5": 1679.9241, "encoder_q-layer.6": 1870.3898, "encoder_q-layer.7": 1686.6321, "encoder_q-layer.8": 1633.2449, "encoder_q-layer.9": 1234.7878, "epoch": 0.6, "inbatch_neg_score": 0.5497, "inbatch_pos_score": 1.0908, "learning_rate": 4.055555555555556e-06, "loss": 3.7092, "norm_diff": 0.1438, "norm_loss": 0.0, "num_token_doc": 66.7276, "num_token_overlap": 11.6709, "num_token_query": 31.4392, "num_token_union": 65.1349, "num_word_context": 202.1124, "num_word_doc": 49.7967, "num_word_query": 23.3385, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2718.3742, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5508, "query_norm": 1.3799, "queue_k_norm": 1.5222, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4392, "sent_len_1": 66.7276, "sent_len_max_0": 127.5888, "sent_len_max_1": 188.5137, "stdk": 0.0482, "stdq": 0.0419, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 92700 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.7024, "doc_norm": 1.5237, "encoder_q-embeddings": 1092.4982, "encoder_q-layer.0": 749.6077, "encoder_q-layer.1": 790.81, "encoder_q-layer.10": 1262.4371, "encoder_q-layer.11": 3431.5161, "encoder_q-layer.2": 857.1161, "encoder_q-layer.3": 918.9602, "encoder_q-layer.4": 974.5358, "encoder_q-layer.5": 991.5051, "encoder_q-layer.6": 1132.0627, "encoder_q-layer.7": 1236.2689, "encoder_q-layer.8": 1400.2295, "encoder_q-layer.9": 1225.611, "epoch": 0.6, "inbatch_neg_score": 0.5493, "inbatch_pos_score": 1.123, "learning_rate": 4.000000000000001e-06, "loss": 3.7024, "norm_diff": 0.1293, "norm_loss": 0.0, "num_token_doc": 66.7661, "num_token_overlap": 11.6947, "num_token_query": 31.4479, "num_token_union": 65.1331, "num_word_context": 202.4086, "num_word_doc": 49.8568, "num_word_query": 23.3478, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2214.7185, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5488, "query_norm": 1.3944, "queue_k_norm": 1.5237, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4479, "sent_len_1": 66.7661, "sent_len_max_0": 127.4112, "sent_len_max_1": 190.4288, "stdk": 0.0481, "stdq": 0.0426, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 92800 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.7128, "doc_norm": 1.5262, "encoder_q-embeddings": 1282.8264, "encoder_q-layer.0": 854.8685, "encoder_q-layer.1": 899.8306, "encoder_q-layer.10": 1235.9501, "encoder_q-layer.11": 3105.7283, "encoder_q-layer.2": 997.5948, "encoder_q-layer.3": 1030.276, "encoder_q-layer.4": 1086.2578, "encoder_q-layer.5": 1113.4395, "encoder_q-layer.6": 1105.4655, "encoder_q-layer.7": 1186.0416, "encoder_q-layer.8": 1194.7263, "encoder_q-layer.9": 1096.8342, "epoch": 0.6, "inbatch_neg_score": 0.5489, "inbatch_pos_score": 1.1377, "learning_rate": 3.944444444444445e-06, "loss": 3.7128, "norm_diff": 0.1274, "norm_loss": 0.0, "num_token_doc": 66.8871, "num_token_overlap": 11.6397, "num_token_query": 31.3727, "num_token_union": 65.2252, "num_word_context": 202.5882, "num_word_doc": 49.9369, "num_word_query": 23.3087, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2122.1663, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5488, "query_norm": 1.3988, "queue_k_norm": 1.5232, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3727, "sent_len_1": 66.8871, "sent_len_max_0": 127.5325, "sent_len_max_1": 188.7975, "stdk": 0.0483, "stdq": 0.0428, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 92900 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.6831, "doc_norm": 1.5248, "encoder_q-embeddings": 1055.734, "encoder_q-layer.0": 710.0508, "encoder_q-layer.1": 756.3289, "encoder_q-layer.10": 1270.2056, "encoder_q-layer.11": 3406.0208, "encoder_q-layer.2": 854.1754, "encoder_q-layer.3": 910.0098, "encoder_q-layer.4": 947.114, "encoder_q-layer.5": 997.6273, "encoder_q-layer.6": 1064.6292, "encoder_q-layer.7": 1114.6718, "encoder_q-layer.8": 1293.3055, "encoder_q-layer.9": 1203.6554, "epoch": 0.61, "inbatch_neg_score": 0.5488, "inbatch_pos_score": 1.1338, "learning_rate": 3.888888888888889e-06, "loss": 3.6831, "norm_diff": 0.1339, "norm_loss": 0.0, "num_token_doc": 66.915, "num_token_overlap": 11.7184, "num_token_query": 31.5268, "num_token_union": 65.2432, "num_word_context": 202.596, "num_word_doc": 49.9251, "num_word_query": 23.4227, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2189.9536, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5498, "query_norm": 1.3909, "queue_k_norm": 1.5239, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5268, "sent_len_1": 66.915, "sent_len_max_0": 127.4988, "sent_len_max_1": 189.15, "stdk": 0.0482, "stdq": 0.0424, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 93000 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.7125, "doc_norm": 1.5129, "encoder_q-embeddings": 1171.9517, "encoder_q-layer.0": 785.8341, "encoder_q-layer.1": 871.0766, "encoder_q-layer.10": 1363.7715, "encoder_q-layer.11": 3417.4636, "encoder_q-layer.2": 954.5459, "encoder_q-layer.3": 968.3933, "encoder_q-layer.4": 1045.0428, "encoder_q-layer.5": 1042.614, "encoder_q-layer.6": 1025.6177, "encoder_q-layer.7": 1135.7698, "encoder_q-layer.8": 1298.5879, "encoder_q-layer.9": 1198.2145, "epoch": 0.61, "inbatch_neg_score": 0.5491, "inbatch_pos_score": 1.0928, "learning_rate": 3.833333333333334e-06, "loss": 3.7125, "norm_diff": 0.1198, "norm_loss": 0.0, "num_token_doc": 66.8013, "num_token_overlap": 11.6402, "num_token_query": 31.4054, "num_token_union": 65.1956, "num_word_context": 202.2248, "num_word_doc": 49.8442, "num_word_query": 23.3501, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2174.4072, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5493, "query_norm": 1.3932, "queue_k_norm": 1.5247, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4054, "sent_len_1": 66.8013, "sent_len_max_0": 127.5263, "sent_len_max_1": 191.025, "stdk": 0.0477, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 93100 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.7005, "doc_norm": 1.5282, "encoder_q-embeddings": 1176.0154, "encoder_q-layer.0": 821.4213, "encoder_q-layer.1": 891.6918, "encoder_q-layer.10": 1274.5712, "encoder_q-layer.11": 3318.9053, "encoder_q-layer.2": 1021.2568, "encoder_q-layer.3": 1074.3879, "encoder_q-layer.4": 1069.9282, "encoder_q-layer.5": 1085.9443, "encoder_q-layer.6": 1135.163, "encoder_q-layer.7": 1224.2155, "encoder_q-layer.8": 1302.0042, "encoder_q-layer.9": 1192.089, "epoch": 0.61, "inbatch_neg_score": 0.5498, "inbatch_pos_score": 1.1064, "learning_rate": 3.777777777777778e-06, "loss": 3.7005, "norm_diff": 0.1512, "norm_loss": 0.0, "num_token_doc": 66.7572, "num_token_overlap": 11.6611, "num_token_query": 31.4172, "num_token_union": 65.1893, "num_word_context": 202.4502, "num_word_doc": 49.9059, "num_word_query": 23.3436, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2226.9138, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5493, "query_norm": 1.377, "queue_k_norm": 1.5245, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4172, "sent_len_1": 66.7572, "sent_len_max_0": 127.545, "sent_len_max_1": 187.8762, "stdk": 0.0483, "stdq": 0.0419, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 93200 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.7148, "doc_norm": 1.5215, "encoder_q-embeddings": 2737.4048, "encoder_q-layer.0": 1888.106, "encoder_q-layer.1": 2070.636, "encoder_q-layer.10": 1413.6174, "encoder_q-layer.11": 3363.7576, "encoder_q-layer.2": 2678.8372, "encoder_q-layer.3": 2768.7878, "encoder_q-layer.4": 3157.2144, "encoder_q-layer.5": 3238.55, "encoder_q-layer.6": 2857.5852, "encoder_q-layer.7": 2503.791, "encoder_q-layer.8": 2064.5359, "encoder_q-layer.9": 1423.41, "epoch": 0.61, "inbatch_neg_score": 0.5503, "inbatch_pos_score": 1.1094, "learning_rate": 3.722222222222222e-06, "loss": 3.7148, "norm_diff": 0.1288, "norm_loss": 0.0, "num_token_doc": 66.5808, "num_token_overlap": 11.6845, "num_token_query": 31.539, "num_token_union": 65.0809, "num_word_context": 202.1243, "num_word_doc": 49.6873, "num_word_query": 23.4354, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3894.3523, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5503, "query_norm": 1.3927, "queue_k_norm": 1.5251, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.539, "sent_len_1": 66.5808, "sent_len_max_0": 127.7313, "sent_len_max_1": 188.5225, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 93300 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.7082, "doc_norm": 1.5216, "encoder_q-embeddings": 2561.6372, "encoder_q-layer.0": 1846.7427, "encoder_q-layer.1": 1932.833, "encoder_q-layer.10": 2733.1694, "encoder_q-layer.11": 6562.4624, "encoder_q-layer.2": 2192.9033, "encoder_q-layer.3": 2213.3337, "encoder_q-layer.4": 2327.5842, "encoder_q-layer.5": 2449.4863, "encoder_q-layer.6": 2573.2014, "encoder_q-layer.7": 2583.405, "encoder_q-layer.8": 2636.0674, "encoder_q-layer.9": 2369.9407, "epoch": 0.61, "inbatch_neg_score": 0.5493, "inbatch_pos_score": 1.127, "learning_rate": 3.666666666666667e-06, "loss": 3.7082, "norm_diff": 0.125, "norm_loss": 0.0, "num_token_doc": 66.6663, "num_token_overlap": 11.6408, "num_token_query": 31.4052, "num_token_union": 65.1335, "num_word_context": 202.4079, "num_word_doc": 49.7351, "num_word_query": 23.3164, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4564.0418, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5493, "query_norm": 1.3965, "queue_k_norm": 1.524, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4052, "sent_len_1": 66.6663, "sent_len_max_0": 127.5738, "sent_len_max_1": 189.9575, "stdk": 0.0481, "stdq": 0.0427, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 93400 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.6974, "doc_norm": 1.5277, "encoder_q-embeddings": 3539.1504, "encoder_q-layer.0": 2456.8306, "encoder_q-layer.1": 2774.4849, "encoder_q-layer.10": 2713.5166, "encoder_q-layer.11": 6575.46, "encoder_q-layer.2": 3231.3364, "encoder_q-layer.3": 3539.5391, "encoder_q-layer.4": 3816.5161, "encoder_q-layer.5": 3929.251, "encoder_q-layer.6": 3781.6157, "encoder_q-layer.7": 3337.0955, "encoder_q-layer.8": 3400.8762, "encoder_q-layer.9": 2560.8633, "epoch": 0.61, "inbatch_neg_score": 0.55, "inbatch_pos_score": 1.1309, "learning_rate": 3.611111111111111e-06, "loss": 3.6974, "norm_diff": 0.1425, "norm_loss": 0.0, "num_token_doc": 67.0224, "num_token_overlap": 11.732, "num_token_query": 31.5593, "num_token_union": 65.3648, "num_word_context": 202.5337, "num_word_doc": 50.0151, "num_word_query": 23.4785, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5593.3381, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5493, "query_norm": 1.3852, "queue_k_norm": 1.5242, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.5593, "sent_len_1": 67.0224, "sent_len_max_0": 127.5425, "sent_len_max_1": 189.6575, "stdk": 0.0483, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 93500 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.694, "doc_norm": 1.5158, "encoder_q-embeddings": 3063.3633, "encoder_q-layer.0": 2191.1265, "encoder_q-layer.1": 2479.3486, "encoder_q-layer.10": 2632.54, "encoder_q-layer.11": 6616.5913, "encoder_q-layer.2": 2977.4441, "encoder_q-layer.3": 3131.0967, "encoder_q-layer.4": 3407.1018, "encoder_q-layer.5": 3769.1724, "encoder_q-layer.6": 3740.8162, "encoder_q-layer.7": 3240.5945, "encoder_q-layer.8": 3167.6067, "encoder_q-layer.9": 2713.1526, "epoch": 0.61, "inbatch_neg_score": 0.5517, "inbatch_pos_score": 1.1211, "learning_rate": 3.555555555555556e-06, "loss": 3.694, "norm_diff": 0.1249, "norm_loss": 0.0, "num_token_doc": 66.838, "num_token_overlap": 11.679, "num_token_query": 31.3693, "num_token_union": 65.1197, "num_word_context": 202.145, "num_word_doc": 49.8695, "num_word_query": 23.293, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5302.5618, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5518, "query_norm": 1.3909, "queue_k_norm": 1.5224, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3693, "sent_len_1": 66.838, "sent_len_max_0": 127.5512, "sent_len_max_1": 188.825, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 93600 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.7083, "doc_norm": 1.5282, "encoder_q-embeddings": 2823.572, "encoder_q-layer.0": 1863.0308, "encoder_q-layer.1": 2104.0281, "encoder_q-layer.10": 2557.8301, "encoder_q-layer.11": 6531.5747, "encoder_q-layer.2": 2367.7756, "encoder_q-layer.3": 2536.583, "encoder_q-layer.4": 2697.6069, "encoder_q-layer.5": 2862.9146, "encoder_q-layer.6": 2763.3074, "encoder_q-layer.7": 2856.0786, "encoder_q-layer.8": 2764.3779, "encoder_q-layer.9": 2308.4304, "epoch": 0.61, "inbatch_neg_score": 0.5493, "inbatch_pos_score": 1.1396, "learning_rate": 3.5000000000000004e-06, "loss": 3.7083, "norm_diff": 0.1378, "norm_loss": 0.0, "num_token_doc": 66.8768, "num_token_overlap": 11.6381, "num_token_query": 31.354, "num_token_union": 65.2219, "num_word_context": 202.4686, "num_word_doc": 49.936, "num_word_query": 23.2978, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4746.3219, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5503, "query_norm": 1.3903, "queue_k_norm": 1.5265, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.354, "sent_len_1": 66.8768, "sent_len_max_0": 127.4562, "sent_len_max_1": 185.6687, "stdk": 0.0483, "stdq": 0.0424, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 93700 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.7044, "doc_norm": 1.5223, "encoder_q-embeddings": 2405.4517, "encoder_q-layer.0": 1681.1107, "encoder_q-layer.1": 1795.2517, "encoder_q-layer.10": 2562.0691, "encoder_q-layer.11": 6681.0288, "encoder_q-layer.2": 2102.0613, "encoder_q-layer.3": 2207.6372, "encoder_q-layer.4": 2290.5132, "encoder_q-layer.5": 2415.2737, "encoder_q-layer.6": 2342.425, "encoder_q-layer.7": 2421.7041, "encoder_q-layer.8": 2667.1248, "encoder_q-layer.9": 2387.1909, "epoch": 0.61, "inbatch_neg_score": 0.5518, "inbatch_pos_score": 1.124, "learning_rate": 3.4444444444444444e-06, "loss": 3.7044, "norm_diff": 0.1359, "norm_loss": 0.0, "num_token_doc": 66.7777, "num_token_overlap": 11.6971, "num_token_query": 31.3892, "num_token_union": 65.0853, "num_word_context": 202.5687, "num_word_doc": 49.8427, "num_word_query": 23.3079, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4516.5343, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5518, "query_norm": 1.3864, "queue_k_norm": 1.525, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3892, "sent_len_1": 66.7777, "sent_len_max_0": 127.6325, "sent_len_max_1": 188.9038, "stdk": 0.0481, "stdq": 0.0422, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 93800 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.712, "doc_norm": 1.5299, "encoder_q-embeddings": 2398.3054, "encoder_q-layer.0": 1585.0791, "encoder_q-layer.1": 1744.0801, "encoder_q-layer.10": 2760.8696, "encoder_q-layer.11": 6852.6191, "encoder_q-layer.2": 1983.8215, "encoder_q-layer.3": 2108.1055, "encoder_q-layer.4": 2222.2302, "encoder_q-layer.5": 2372.4473, "encoder_q-layer.6": 2607.4011, "encoder_q-layer.7": 2674.2307, "encoder_q-layer.8": 2856.8628, "encoder_q-layer.9": 2554.9197, "epoch": 0.61, "inbatch_neg_score": 0.5525, "inbatch_pos_score": 1.125, "learning_rate": 3.3888888888888893e-06, "loss": 3.712, "norm_diff": 0.1417, "norm_loss": 0.0, "num_token_doc": 66.9168, "num_token_overlap": 11.6862, "num_token_query": 31.4231, "num_token_union": 65.256, "num_word_context": 202.1537, "num_word_doc": 49.9401, "num_word_query": 23.3213, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4589.6118, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5527, "query_norm": 1.3882, "queue_k_norm": 1.5243, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4231, "sent_len_1": 66.9168, "sent_len_max_0": 127.56, "sent_len_max_1": 191.7937, "stdk": 0.0484, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 93900 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.6979, "doc_norm": 1.5222, "encoder_q-embeddings": 2703.7954, "encoder_q-layer.0": 1818.2247, "encoder_q-layer.1": 1960.3092, "encoder_q-layer.10": 2757.824, "encoder_q-layer.11": 6663.1733, "encoder_q-layer.2": 2343.2847, "encoder_q-layer.3": 2418.2617, "encoder_q-layer.4": 2605.9893, "encoder_q-layer.5": 2616.1555, "encoder_q-layer.6": 2613.9143, "encoder_q-layer.7": 2417.6538, "encoder_q-layer.8": 2608.1616, "encoder_q-layer.9": 2411.6375, "epoch": 0.61, "inbatch_neg_score": 0.5528, "inbatch_pos_score": 1.1104, "learning_rate": 3.3333333333333333e-06, "loss": 3.6979, "norm_diff": 0.136, "norm_loss": 0.0, "num_token_doc": 67.0112, "num_token_overlap": 11.745, "num_token_query": 31.5165, "num_token_union": 65.2975, "num_word_context": 202.6232, "num_word_doc": 49.9871, "num_word_query": 23.4006, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4659.5647, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5527, "query_norm": 1.3862, "queue_k_norm": 1.5248, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5165, "sent_len_1": 67.0112, "sent_len_max_0": 127.6012, "sent_len_max_1": 192.1238, "stdk": 0.048, "stdq": 0.0422, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 94000 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.722, "doc_norm": 1.5257, "encoder_q-embeddings": 5224.502, "encoder_q-layer.0": 3430.2322, "encoder_q-layer.1": 3914.2759, "encoder_q-layer.10": 2602.554, "encoder_q-layer.11": 6578.6494, "encoder_q-layer.2": 4307.0815, "encoder_q-layer.3": 4735.0332, "encoder_q-layer.4": 4940.4443, "encoder_q-layer.5": 5046.2563, "encoder_q-layer.6": 5918.7095, "encoder_q-layer.7": 4925.3911, "encoder_q-layer.8": 3989.697, "encoder_q-layer.9": 2562.8633, "epoch": 0.61, "inbatch_neg_score": 0.5518, "inbatch_pos_score": 1.123, "learning_rate": 3.277777777777778e-06, "loss": 3.722, "norm_diff": 0.1296, "norm_loss": 0.0, "num_token_doc": 66.7055, "num_token_overlap": 11.6758, "num_token_query": 31.4642, "num_token_union": 65.1381, "num_word_context": 201.9912, "num_word_doc": 49.7665, "num_word_query": 23.3575, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7004.1202, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5508, "query_norm": 1.3962, "queue_k_norm": 1.5241, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4642, "sent_len_1": 66.7055, "sent_len_max_0": 127.3225, "sent_len_max_1": 188.4575, "stdk": 0.0482, "stdq": 0.0427, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 94100 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.7086, "doc_norm": 1.5253, "encoder_q-embeddings": 2389.3054, "encoder_q-layer.0": 1649.2266, "encoder_q-layer.1": 1721.5796, "encoder_q-layer.10": 2988.3562, "encoder_q-layer.11": 6613.3291, "encoder_q-layer.2": 1985.7185, "encoder_q-layer.3": 2161.0408, "encoder_q-layer.4": 2259.8794, "encoder_q-layer.5": 2222.7817, "encoder_q-layer.6": 2553.7219, "encoder_q-layer.7": 2718.9099, "encoder_q-layer.8": 2854.0681, "encoder_q-layer.9": 2516.2949, "epoch": 0.61, "inbatch_neg_score": 0.551, "inbatch_pos_score": 1.127, "learning_rate": 3.2222222222222222e-06, "loss": 3.7086, "norm_diff": 0.1302, "norm_loss": 0.0, "num_token_doc": 66.7897, "num_token_overlap": 11.6923, "num_token_query": 31.3948, "num_token_union": 65.1269, "num_word_context": 202.2823, "num_word_doc": 49.7982, "num_word_query": 23.3113, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4496.4868, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5513, "query_norm": 1.3951, "queue_k_norm": 1.5237, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3948, "sent_len_1": 66.7897, "sent_len_max_0": 127.4587, "sent_len_max_1": 189.4512, "stdk": 0.0482, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 94200 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.7136, "doc_norm": 1.5219, "encoder_q-embeddings": 2275.1633, "encoder_q-layer.0": 1532.1788, "encoder_q-layer.1": 1655.4973, "encoder_q-layer.10": 2609.0315, "encoder_q-layer.11": 6912.0898, "encoder_q-layer.2": 1809.6693, "encoder_q-layer.3": 1897.8331, "encoder_q-layer.4": 2056.3235, "encoder_q-layer.5": 2081.1755, "encoder_q-layer.6": 2109.8816, "encoder_q-layer.7": 2225.2073, "encoder_q-layer.8": 2467.3843, "encoder_q-layer.9": 2432.2207, "epoch": 0.61, "inbatch_neg_score": 0.5518, "inbatch_pos_score": 1.124, "learning_rate": 3.166666666666667e-06, "loss": 3.7136, "norm_diff": 0.1415, "norm_loss": 0.0, "num_token_doc": 66.5393, "num_token_overlap": 11.6533, "num_token_query": 31.4518, "num_token_union": 65.0495, "num_word_context": 201.992, "num_word_doc": 49.647, "num_word_query": 23.3444, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4386.5024, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5522, "query_norm": 1.3803, "queue_k_norm": 1.525, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4518, "sent_len_1": 66.5393, "sent_len_max_0": 127.5687, "sent_len_max_1": 188.2388, "stdk": 0.048, "stdq": 0.0419, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 94300 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.6804, "doc_norm": 1.526, "encoder_q-embeddings": 2026.678, "encoder_q-layer.0": 1356.1528, "encoder_q-layer.1": 1365.5914, "encoder_q-layer.10": 3198.3923, "encoder_q-layer.11": 6920.0967, "encoder_q-layer.2": 1538.9138, "encoder_q-layer.3": 1570.3776, "encoder_q-layer.4": 1655.9641, "encoder_q-layer.5": 1713.1981, "encoder_q-layer.6": 1893.1171, "encoder_q-layer.7": 2188.5125, "encoder_q-layer.8": 2594.8796, "encoder_q-layer.9": 2551.427, "epoch": 0.61, "inbatch_neg_score": 0.5511, "inbatch_pos_score": 1.1416, "learning_rate": 3.111111111111111e-06, "loss": 3.6804, "norm_diff": 0.1341, "norm_loss": 0.0, "num_token_doc": 66.606, "num_token_overlap": 11.7204, "num_token_query": 31.4425, "num_token_union": 64.9934, "num_word_context": 202.2003, "num_word_doc": 49.6804, "num_word_query": 23.3542, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4302.3417, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5518, "query_norm": 1.3919, "queue_k_norm": 1.5245, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4425, "sent_len_1": 66.606, "sent_len_max_0": 127.3525, "sent_len_max_1": 190.3038, "stdk": 0.0482, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 94400 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.6982, "doc_norm": 1.5299, "encoder_q-embeddings": 3078.615, "encoder_q-layer.0": 2127.1084, "encoder_q-layer.1": 2458.9478, "encoder_q-layer.10": 2587.5459, "encoder_q-layer.11": 6385.7026, "encoder_q-layer.2": 2700.6929, "encoder_q-layer.3": 2567.406, "encoder_q-layer.4": 2681.0466, "encoder_q-layer.5": 2567.3271, "encoder_q-layer.6": 2356.637, "encoder_q-layer.7": 2273.3735, "encoder_q-layer.8": 2635.814, "encoder_q-layer.9": 2390.6997, "epoch": 0.62, "inbatch_neg_score": 0.5534, "inbatch_pos_score": 1.1387, "learning_rate": 3.0555555555555556e-06, "loss": 3.6982, "norm_diff": 0.137, "norm_loss": 0.0, "num_token_doc": 66.8583, "num_token_overlap": 11.654, "num_token_query": 31.3267, "num_token_union": 65.1342, "num_word_context": 202.1476, "num_word_doc": 49.8815, "num_word_query": 23.2624, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4657.4397, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5532, "query_norm": 1.3928, "queue_k_norm": 1.5237, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3267, "sent_len_1": 66.8583, "sent_len_max_0": 127.4437, "sent_len_max_1": 189.7388, "stdk": 0.0483, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 94500 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.7135, "doc_norm": 1.5283, "encoder_q-embeddings": 3915.5178, "encoder_q-layer.0": 2718.8669, "encoder_q-layer.1": 3085.3118, "encoder_q-layer.10": 2706.0901, "encoder_q-layer.11": 6849.3188, "encoder_q-layer.2": 3522.9065, "encoder_q-layer.3": 3717.9128, "encoder_q-layer.4": 4069.6023, "encoder_q-layer.5": 3893.2278, "encoder_q-layer.6": 3454.2534, "encoder_q-layer.7": 3240.2402, "encoder_q-layer.8": 3100.886, "encoder_q-layer.9": 2447.2048, "epoch": 0.62, "inbatch_neg_score": 0.5527, "inbatch_pos_score": 1.1377, "learning_rate": 3e-06, "loss": 3.7135, "norm_diff": 0.1286, "norm_loss": 0.0, "num_token_doc": 66.6748, "num_token_overlap": 11.6495, "num_token_query": 31.3811, "num_token_union": 65.0905, "num_word_context": 201.8718, "num_word_doc": 49.7432, "num_word_query": 23.3326, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5787.1633, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5537, "query_norm": 1.3996, "queue_k_norm": 1.5251, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3811, "sent_len_1": 66.6748, "sent_len_max_0": 127.4587, "sent_len_max_1": 188.2587, "stdk": 0.0483, "stdq": 0.0428, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 94600 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.7218, "doc_norm": 1.5246, "encoder_q-embeddings": 2400.4158, "encoder_q-layer.0": 1617.6815, "encoder_q-layer.1": 1830.0144, "encoder_q-layer.10": 2809.0742, "encoder_q-layer.11": 6810.3052, "encoder_q-layer.2": 2083.5403, "encoder_q-layer.3": 2161.4165, "encoder_q-layer.4": 2550.2957, "encoder_q-layer.5": 2518.1892, "encoder_q-layer.6": 2604.2812, "encoder_q-layer.7": 2712.2493, "encoder_q-layer.8": 3048.7686, "encoder_q-layer.9": 2778.8262, "epoch": 0.62, "inbatch_neg_score": 0.5527, "inbatch_pos_score": 1.1191, "learning_rate": 2.9444444444444445e-06, "loss": 3.7218, "norm_diff": 0.1421, "norm_loss": 0.0, "num_token_doc": 66.7355, "num_token_overlap": 11.6206, "num_token_query": 31.3538, "num_token_union": 65.1488, "num_word_context": 202.166, "num_word_doc": 49.8191, "num_word_query": 23.2957, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4660.7166, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5537, "query_norm": 1.3825, "queue_k_norm": 1.524, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3538, "sent_len_1": 66.7355, "sent_len_max_0": 127.5687, "sent_len_max_1": 188.5813, "stdk": 0.0481, "stdq": 0.042, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 94700 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7093, "doc_norm": 1.5223, "encoder_q-embeddings": 5125.8071, "encoder_q-layer.0": 3396.1138, "encoder_q-layer.1": 4119.292, "encoder_q-layer.10": 2385.0681, "encoder_q-layer.11": 6334.4795, "encoder_q-layer.2": 4792.4873, "encoder_q-layer.3": 4976.8149, "encoder_q-layer.4": 5212.8345, "encoder_q-layer.5": 6054.2144, "encoder_q-layer.6": 5643.478, "encoder_q-layer.7": 4816.2319, "encoder_q-layer.8": 3817.7786, "encoder_q-layer.9": 2413.1326, "epoch": 0.62, "inbatch_neg_score": 0.5564, "inbatch_pos_score": 1.1094, "learning_rate": 2.888888888888889e-06, "loss": 3.7093, "norm_diff": 0.1363, "norm_loss": 0.0, "num_token_doc": 66.6822, "num_token_overlap": 11.6803, "num_token_query": 31.4065, "num_token_union": 65.0993, "num_word_context": 202.0555, "num_word_doc": 49.7375, "num_word_query": 23.3123, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7103.0183, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5552, "query_norm": 1.386, "queue_k_norm": 1.5245, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4065, "sent_len_1": 66.6822, "sent_len_max_0": 127.47, "sent_len_max_1": 189.265, "stdk": 0.048, "stdq": 0.0421, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 94800 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.6969, "doc_norm": 1.531, "encoder_q-embeddings": 2183.4075, "encoder_q-layer.0": 1503.8538, "encoder_q-layer.1": 1557.3468, "encoder_q-layer.10": 2528.9658, "encoder_q-layer.11": 6561.082, "encoder_q-layer.2": 1817.6605, "encoder_q-layer.3": 1805.646, "encoder_q-layer.4": 1843.1289, "encoder_q-layer.5": 1835.4215, "encoder_q-layer.6": 1987.569, "encoder_q-layer.7": 2163.8557, "encoder_q-layer.8": 2487.7112, "encoder_q-layer.9": 2359.9512, "epoch": 0.62, "inbatch_neg_score": 0.5543, "inbatch_pos_score": 1.1367, "learning_rate": 2.8333333333333335e-06, "loss": 3.6969, "norm_diff": 0.1347, "norm_loss": 0.0, "num_token_doc": 66.8396, "num_token_overlap": 11.6819, "num_token_query": 31.3865, "num_token_union": 65.1678, "num_word_context": 202.3137, "num_word_doc": 49.8429, "num_word_query": 23.3201, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4187.4645, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5537, "query_norm": 1.3963, "queue_k_norm": 1.5247, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3865, "sent_len_1": 66.8396, "sent_len_max_0": 127.5487, "sent_len_max_1": 190.9938, "stdk": 0.0483, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 94900 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.7239, "doc_norm": 1.5286, "encoder_q-embeddings": 4389.9883, "encoder_q-layer.0": 3147.0771, "encoder_q-layer.1": 3515.8433, "encoder_q-layer.10": 2884.7231, "encoder_q-layer.11": 6807.5806, "encoder_q-layer.2": 3814.8726, "encoder_q-layer.3": 4261.9531, "encoder_q-layer.4": 4245.0874, "encoder_q-layer.5": 4229.6748, "encoder_q-layer.6": 4608.2246, "encoder_q-layer.7": 5167.1353, "encoder_q-layer.8": 3949.2595, "encoder_q-layer.9": 2679.0857, "epoch": 0.62, "inbatch_neg_score": 0.5533, "inbatch_pos_score": 1.1289, "learning_rate": 2.777777777777778e-06, "loss": 3.7239, "norm_diff": 0.1435, "norm_loss": 0.0, "num_token_doc": 66.6847, "num_token_overlap": 11.6603, "num_token_query": 31.3613, "num_token_union": 65.0827, "num_word_context": 202.0793, "num_word_doc": 49.765, "num_word_query": 23.2808, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6566.5949, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5527, "query_norm": 1.3851, "queue_k_norm": 1.5246, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3613, "sent_len_1": 66.6847, "sent_len_max_0": 127.4537, "sent_len_max_1": 188.3363, "stdk": 0.0483, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 95000 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.7062, "doc_norm": 1.5243, "encoder_q-embeddings": 2912.2979, "encoder_q-layer.0": 1985.5513, "encoder_q-layer.1": 2072.6885, "encoder_q-layer.10": 2577.8713, "encoder_q-layer.11": 6702.7422, "encoder_q-layer.2": 2313.2512, "encoder_q-layer.3": 2425.1602, "encoder_q-layer.4": 2532.7988, "encoder_q-layer.5": 2365.7542, "encoder_q-layer.6": 2335.5764, "encoder_q-layer.7": 2553.5186, "encoder_q-layer.8": 2875.8462, "encoder_q-layer.9": 2496.5598, "epoch": 0.62, "inbatch_neg_score": 0.5554, "inbatch_pos_score": 1.1328, "learning_rate": 2.7222222222222224e-06, "loss": 3.7062, "norm_diff": 0.1314, "norm_loss": 0.0, "num_token_doc": 66.5512, "num_token_overlap": 11.6184, "num_token_query": 31.2825, "num_token_union": 64.9884, "num_word_context": 202.2863, "num_word_doc": 49.6389, "num_word_query": 23.2063, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4687.2227, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5547, "query_norm": 1.3929, "queue_k_norm": 1.5245, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2825, "sent_len_1": 66.5512, "sent_len_max_0": 127.3888, "sent_len_max_1": 189.2488, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 95100 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.7188, "doc_norm": 1.5293, "encoder_q-embeddings": 2332.8372, "encoder_q-layer.0": 1568.6105, "encoder_q-layer.1": 1763.9802, "encoder_q-layer.10": 2425.8657, "encoder_q-layer.11": 6599.3906, "encoder_q-layer.2": 1969.6307, "encoder_q-layer.3": 2056.063, "encoder_q-layer.4": 2131.406, "encoder_q-layer.5": 2131.7415, "encoder_q-layer.6": 2127.6235, "encoder_q-layer.7": 2170.3088, "encoder_q-layer.8": 2426.481, "encoder_q-layer.9": 2293.5632, "epoch": 0.62, "inbatch_neg_score": 0.5541, "inbatch_pos_score": 1.1221, "learning_rate": 2.666666666666667e-06, "loss": 3.7188, "norm_diff": 0.139, "norm_loss": 0.0, "num_token_doc": 66.8965, "num_token_overlap": 11.6532, "num_token_query": 31.3003, "num_token_union": 65.1577, "num_word_context": 202.5868, "num_word_doc": 49.982, "num_word_query": 23.2406, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4351.7029, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5537, "query_norm": 1.3903, "queue_k_norm": 1.527, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3003, "sent_len_1": 66.8965, "sent_len_max_0": 127.4025, "sent_len_max_1": 187.595, "stdk": 0.0483, "stdq": 0.0424, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 95200 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.7233, "doc_norm": 1.5347, "encoder_q-embeddings": 2481.5742, "encoder_q-layer.0": 1708.3872, "encoder_q-layer.1": 1892.6261, "encoder_q-layer.10": 2457.1179, "encoder_q-layer.11": 6340.5562, "encoder_q-layer.2": 2149.813, "encoder_q-layer.3": 2238.8218, "encoder_q-layer.4": 2404.8743, "encoder_q-layer.5": 2398.6191, "encoder_q-layer.6": 2331.4988, "encoder_q-layer.7": 2291.0054, "encoder_q-layer.8": 2486.2532, "encoder_q-layer.9": 2344.3945, "epoch": 0.62, "inbatch_neg_score": 0.5517, "inbatch_pos_score": 1.1348, "learning_rate": 2.6111111111111113e-06, "loss": 3.7233, "norm_diff": 0.1367, "norm_loss": 0.0, "num_token_doc": 66.6942, "num_token_overlap": 11.6653, "num_token_query": 31.4285, "num_token_union": 65.0934, "num_word_context": 202.3859, "num_word_doc": 49.7761, "num_word_query": 23.3428, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4395.3157, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5532, "query_norm": 1.398, "queue_k_norm": 1.5236, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4285, "sent_len_1": 66.6942, "sent_len_max_0": 127.5337, "sent_len_max_1": 188.0213, "stdk": 0.0485, "stdq": 0.0427, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 95300 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.6971, "doc_norm": 1.5316, "encoder_q-embeddings": 4800.4727, "encoder_q-layer.0": 3117.27, "encoder_q-layer.1": 3318.2971, "encoder_q-layer.10": 4768.7471, "encoder_q-layer.11": 12912.666, "encoder_q-layer.2": 3814.9924, "encoder_q-layer.3": 4161.4644, "encoder_q-layer.4": 4536.8335, "encoder_q-layer.5": 4676.2681, "encoder_q-layer.6": 4860.0566, "encoder_q-layer.7": 4811.9629, "encoder_q-layer.8": 5468.1021, "encoder_q-layer.9": 4645.4946, "epoch": 0.62, "inbatch_neg_score": 0.5524, "inbatch_pos_score": 1.123, "learning_rate": 2.5555555555555557e-06, "loss": 3.6971, "norm_diff": 0.1373, "norm_loss": 0.0, "num_token_doc": 66.627, "num_token_overlap": 11.6544, "num_token_query": 31.2508, "num_token_union": 64.9601, "num_word_context": 201.8568, "num_word_doc": 49.7051, "num_word_query": 23.1954, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8929.8889, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5537, "query_norm": 1.3943, "queue_k_norm": 1.5251, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2508, "sent_len_1": 66.627, "sent_len_max_0": 127.3375, "sent_len_max_1": 191.0775, "stdk": 0.0484, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 95400 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.6963, "doc_norm": 1.5304, "encoder_q-embeddings": 4751.2163, "encoder_q-layer.0": 3368.8145, "encoder_q-layer.1": 3683.4761, "encoder_q-layer.10": 5182.0112, "encoder_q-layer.11": 13339.7998, "encoder_q-layer.2": 4242.897, "encoder_q-layer.3": 4315.6367, "encoder_q-layer.4": 4254.2568, "encoder_q-layer.5": 4362.0396, "encoder_q-layer.6": 4391.3242, "encoder_q-layer.7": 4703.2485, "encoder_q-layer.8": 5275.1211, "encoder_q-layer.9": 4896.3618, "epoch": 0.62, "inbatch_neg_score": 0.5521, "inbatch_pos_score": 1.127, "learning_rate": 2.5e-06, "loss": 3.6963, "norm_diff": 0.1251, "norm_loss": 0.0, "num_token_doc": 66.7516, "num_token_overlap": 11.6207, "num_token_query": 31.2109, "num_token_union": 65.0277, "num_word_context": 202.2554, "num_word_doc": 49.7888, "num_word_query": 23.165, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8900.0707, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5532, "query_norm": 1.4053, "queue_k_norm": 1.5254, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2109, "sent_len_1": 66.7516, "sent_len_max_0": 127.3937, "sent_len_max_1": 188.27, "stdk": 0.0483, "stdq": 0.0431, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 95500 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.7151, "doc_norm": 1.5243, "encoder_q-embeddings": 5611.9385, "encoder_q-layer.0": 3890.2734, "encoder_q-layer.1": 4431.4316, "encoder_q-layer.10": 5173.5703, "encoder_q-layer.11": 13631.9727, "encoder_q-layer.2": 5138.1636, "encoder_q-layer.3": 5400.7563, "encoder_q-layer.4": 5253.5625, "encoder_q-layer.5": 5200.5791, "encoder_q-layer.6": 5072.085, "encoder_q-layer.7": 5403.9878, "encoder_q-layer.8": 5880.9092, "encoder_q-layer.9": 4923.3433, "epoch": 0.62, "inbatch_neg_score": 0.5531, "inbatch_pos_score": 1.1367, "learning_rate": 2.4444444444444447e-06, "loss": 3.7151, "norm_diff": 0.1346, "norm_loss": 0.0, "num_token_doc": 66.7511, "num_token_overlap": 11.6704, "num_token_query": 31.3738, "num_token_union": 65.0888, "num_word_context": 201.8843, "num_word_doc": 49.8236, "num_word_query": 23.3057, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9682.6922, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5537, "query_norm": 1.3897, "queue_k_norm": 1.5247, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3738, "sent_len_1": 66.7511, "sent_len_max_0": 127.4562, "sent_len_max_1": 190.4087, "stdk": 0.0481, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 95600 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.7038, "doc_norm": 1.5247, "encoder_q-embeddings": 4023.7649, "encoder_q-layer.0": 2679.9795, "encoder_q-layer.1": 2736.3276, "encoder_q-layer.10": 5000.6641, "encoder_q-layer.11": 12885.9756, "encoder_q-layer.2": 3050.4583, "encoder_q-layer.3": 3137.3384, "encoder_q-layer.4": 3297.2112, "encoder_q-layer.5": 3301.3228, "encoder_q-layer.6": 3760.0405, "encoder_q-layer.7": 3982.2671, "encoder_q-layer.8": 4972.8647, "encoder_q-layer.9": 4810.5146, "epoch": 0.62, "inbatch_neg_score": 0.5557, "inbatch_pos_score": 1.124, "learning_rate": 2.388888888888889e-06, "loss": 3.7038, "norm_diff": 0.1367, "norm_loss": 0.0, "num_token_doc": 66.7376, "num_token_overlap": 11.6629, "num_token_query": 31.2798, "num_token_union": 64.9854, "num_word_context": 202.1834, "num_word_doc": 49.779, "num_word_query": 23.2228, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8145.2503, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5562, "query_norm": 1.388, "queue_k_norm": 1.5237, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.2798, "sent_len_1": 66.7376, "sent_len_max_0": 127.535, "sent_len_max_1": 188.8388, "stdk": 0.0481, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 95700 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 3.7083, "doc_norm": 1.5126, "encoder_q-embeddings": 2590.1826, "encoder_q-layer.0": 1753.4486, "encoder_q-layer.1": 1942.5435, "encoder_q-layer.10": 2455.6719, "encoder_q-layer.11": 6571.1328, "encoder_q-layer.2": 2230.9827, "encoder_q-layer.3": 2317.9688, "encoder_q-layer.4": 2371.4065, "encoder_q-layer.5": 2512.342, "encoder_q-layer.6": 2665.5942, "encoder_q-layer.7": 2720.8777, "encoder_q-layer.8": 2856.9399, "encoder_q-layer.9": 2322.0378, "epoch": 0.62, "inbatch_neg_score": 0.5555, "inbatch_pos_score": 1.125, "learning_rate": 2.3333333333333336e-06, "loss": 3.7083, "norm_diff": 0.1188, "norm_loss": 0.0, "num_token_doc": 66.7464, "num_token_overlap": 11.6343, "num_token_query": 31.2349, "num_token_union": 65.0279, "num_word_context": 202.2853, "num_word_doc": 49.8351, "num_word_query": 23.1774, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4618.3895, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5552, "query_norm": 1.3938, "queue_k_norm": 1.5247, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2349, "sent_len_1": 66.7464, "sent_len_max_0": 127.5787, "sent_len_max_1": 188.6987, "stdk": 0.0476, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 95800 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.7081, "doc_norm": 1.5294, "encoder_q-embeddings": 2137.7363, "encoder_q-layer.0": 1431.2924, "encoder_q-layer.1": 1549.2815, "encoder_q-layer.10": 2645.8816, "encoder_q-layer.11": 6740.1538, "encoder_q-layer.2": 1699.2352, "encoder_q-layer.3": 1682.8486, "encoder_q-layer.4": 1818.8469, "encoder_q-layer.5": 1833.5599, "encoder_q-layer.6": 1933.3315, "encoder_q-layer.7": 2103.093, "encoder_q-layer.8": 2609.4109, "encoder_q-layer.9": 2453.4097, "epoch": 0.62, "inbatch_neg_score": 0.5566, "inbatch_pos_score": 1.1221, "learning_rate": 2.277777777777778e-06, "loss": 3.7081, "norm_diff": 0.1467, "norm_loss": 0.0, "num_token_doc": 66.6116, "num_token_overlap": 11.6601, "num_token_query": 31.2378, "num_token_union": 64.9615, "num_word_context": 202.106, "num_word_doc": 49.674, "num_word_query": 23.1974, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4254.0782, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5566, "query_norm": 1.3827, "queue_k_norm": 1.5247, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2378, "sent_len_1": 66.6116, "sent_len_max_0": 127.3125, "sent_len_max_1": 190.93, "stdk": 0.0483, "stdq": 0.042, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 95900 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.6656, "doc_norm": 1.5209, "encoder_q-embeddings": 3044.7358, "encoder_q-layer.0": 2089.2754, "encoder_q-layer.1": 2267.3264, "encoder_q-layer.10": 2548.9275, "encoder_q-layer.11": 6611.3389, "encoder_q-layer.2": 2678.4375, "encoder_q-layer.3": 2877.3557, "encoder_q-layer.4": 2952.8477, "encoder_q-layer.5": 3149.0496, "encoder_q-layer.6": 3485.8733, "encoder_q-layer.7": 3285.0139, "encoder_q-layer.8": 3001.8237, "encoder_q-layer.9": 2449.4482, "epoch": 0.62, "inbatch_neg_score": 0.5556, "inbatch_pos_score": 1.1299, "learning_rate": 2.2222222222222225e-06, "loss": 3.6656, "norm_diff": 0.1267, "norm_loss": 0.0, "num_token_doc": 66.8784, "num_token_overlap": 11.6966, "num_token_query": 31.397, "num_token_union": 65.1677, "num_word_context": 202.1449, "num_word_doc": 49.8672, "num_word_query": 23.2916, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5080.596, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5562, "query_norm": 1.3942, "queue_k_norm": 1.5256, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.397, "sent_len_1": 66.8784, "sent_len_max_0": 127.4488, "sent_len_max_1": 189.2425, "stdk": 0.0479, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 96000 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.7015, "doc_norm": 1.5267, "encoder_q-embeddings": 2413.239, "encoder_q-layer.0": 1680.5369, "encoder_q-layer.1": 1813.1807, "encoder_q-layer.10": 2349.8191, "encoder_q-layer.11": 6413.873, "encoder_q-layer.2": 2129.4438, "encoder_q-layer.3": 2179.2932, "encoder_q-layer.4": 2208.9353, "encoder_q-layer.5": 2212.0317, "encoder_q-layer.6": 2535.009, "encoder_q-layer.7": 2637.6282, "encoder_q-layer.8": 2763.4331, "encoder_q-layer.9": 2340.6509, "epoch": 0.63, "inbatch_neg_score": 0.5533, "inbatch_pos_score": 1.1367, "learning_rate": 2.166666666666667e-06, "loss": 3.7015, "norm_diff": 0.1337, "norm_loss": 0.0, "num_token_doc": 66.768, "num_token_overlap": 11.6578, "num_token_query": 31.4036, "num_token_union": 65.1875, "num_word_context": 202.4154, "num_word_doc": 49.8674, "num_word_query": 23.3117, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4453.91, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5547, "query_norm": 1.393, "queue_k_norm": 1.5279, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4036, "sent_len_1": 66.768, "sent_len_max_0": 127.6038, "sent_len_max_1": 189.2788, "stdk": 0.0482, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 96100 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.7027, "doc_norm": 1.5278, "encoder_q-embeddings": 4461.2295, "encoder_q-layer.0": 3067.5254, "encoder_q-layer.1": 3487.9043, "encoder_q-layer.10": 2372.4534, "encoder_q-layer.11": 6383.083, "encoder_q-layer.2": 4483.103, "encoder_q-layer.3": 4372.145, "encoder_q-layer.4": 4763.2334, "encoder_q-layer.5": 4418.1592, "encoder_q-layer.6": 3823.1775, "encoder_q-layer.7": 3309.3264, "encoder_q-layer.8": 2748.3284, "encoder_q-layer.9": 2295.0183, "epoch": 0.63, "inbatch_neg_score": 0.5584, "inbatch_pos_score": 1.1387, "learning_rate": 2.1111111111111114e-06, "loss": 3.7027, "norm_diff": 0.1312, "norm_loss": 0.0, "num_token_doc": 66.5438, "num_token_overlap": 11.5858, "num_token_query": 31.2126, "num_token_union": 64.9572, "num_word_context": 202.3932, "num_word_doc": 49.6511, "num_word_query": 23.1673, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6241.3176, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5586, "query_norm": 1.3966, "queue_k_norm": 1.5247, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2126, "sent_len_1": 66.5438, "sent_len_max_0": 127.3775, "sent_len_max_1": 189.5925, "stdk": 0.0482, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 96200 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.7008, "doc_norm": 1.5214, "encoder_q-embeddings": 2062.2114, "encoder_q-layer.0": 1391.7334, "encoder_q-layer.1": 1446.0673, "encoder_q-layer.10": 2470.9141, "encoder_q-layer.11": 6489.1582, "encoder_q-layer.2": 1654.7513, "encoder_q-layer.3": 1714.0372, "encoder_q-layer.4": 1813.4423, "encoder_q-layer.5": 1758.7234, "encoder_q-layer.6": 2013.9991, "encoder_q-layer.7": 2167.6042, "encoder_q-layer.8": 2514.6196, "encoder_q-layer.9": 2346.2061, "epoch": 0.63, "inbatch_neg_score": 0.5579, "inbatch_pos_score": 1.125, "learning_rate": 2.055555555555556e-06, "loss": 3.7008, "norm_diff": 0.1259, "norm_loss": 0.0, "num_token_doc": 66.7121, "num_token_overlap": 11.6852, "num_token_query": 31.3718, "num_token_union": 65.0592, "num_word_context": 202.2133, "num_word_doc": 49.7855, "num_word_query": 23.2889, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4149.0437, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5576, "query_norm": 1.3955, "queue_k_norm": 1.525, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3718, "sent_len_1": 66.7121, "sent_len_max_0": 127.4737, "sent_len_max_1": 189.4588, "stdk": 0.048, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 96300 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.7044, "doc_norm": 1.5301, "encoder_q-embeddings": 2395.9741, "encoder_q-layer.0": 1654.0609, "encoder_q-layer.1": 1836.1787, "encoder_q-layer.10": 2596.6357, "encoder_q-layer.11": 6565.0601, "encoder_q-layer.2": 2105.0288, "encoder_q-layer.3": 2160.9697, "encoder_q-layer.4": 2447.762, "encoder_q-layer.5": 2420.4023, "encoder_q-layer.6": 2491.7131, "encoder_q-layer.7": 2746.9668, "encoder_q-layer.8": 2614.6592, "encoder_q-layer.9": 2347.6665, "epoch": 0.63, "inbatch_neg_score": 0.5561, "inbatch_pos_score": 1.1426, "learning_rate": 2.0000000000000003e-06, "loss": 3.7044, "norm_diff": 0.1377, "norm_loss": 0.0, "num_token_doc": 66.8759, "num_token_overlap": 11.6809, "num_token_query": 31.398, "num_token_union": 65.1533, "num_word_context": 202.5089, "num_word_doc": 49.8978, "num_word_query": 23.3223, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4543.7832, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5562, "query_norm": 1.3924, "queue_k_norm": 1.5269, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.398, "sent_len_1": 66.8759, "sent_len_max_0": 127.5675, "sent_len_max_1": 189.8512, "stdk": 0.0483, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 96400 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.7034, "doc_norm": 1.5259, "encoder_q-embeddings": 2052.1008, "encoder_q-layer.0": 1419.6606, "encoder_q-layer.1": 1568.3134, "encoder_q-layer.10": 2555.6755, "encoder_q-layer.11": 6547.1738, "encoder_q-layer.2": 1738.3987, "encoder_q-layer.3": 1773.4117, "encoder_q-layer.4": 1782.7115, "encoder_q-layer.5": 1807.9856, "encoder_q-layer.6": 2000.5615, "encoder_q-layer.7": 2269.248, "encoder_q-layer.8": 2494.1655, "encoder_q-layer.9": 2354.7761, "epoch": 0.63, "inbatch_neg_score": 0.5564, "inbatch_pos_score": 1.1309, "learning_rate": 1.9444444444444444e-06, "loss": 3.7034, "norm_diff": 0.1289, "norm_loss": 0.0, "num_token_doc": 66.8256, "num_token_overlap": 11.6591, "num_token_query": 31.4064, "num_token_union": 65.1819, "num_word_context": 202.3493, "num_word_doc": 49.8433, "num_word_query": 23.3363, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4174.1327, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5566, "query_norm": 1.3969, "queue_k_norm": 1.5276, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4064, "sent_len_1": 66.8256, "sent_len_max_0": 127.5175, "sent_len_max_1": 188.7175, "stdk": 0.0481, "stdq": 0.0426, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 96500 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.715, "doc_norm": 1.5204, "encoder_q-embeddings": 2476.0774, "encoder_q-layer.0": 1707.0323, "encoder_q-layer.1": 1843.5212, "encoder_q-layer.10": 2592.2246, "encoder_q-layer.11": 6884.0435, "encoder_q-layer.2": 2028.4252, "encoder_q-layer.3": 2148.5386, "encoder_q-layer.4": 2343.5044, "encoder_q-layer.5": 2439.4961, "encoder_q-layer.6": 2352.3494, "encoder_q-layer.7": 2450.4146, "encoder_q-layer.8": 2679.741, "encoder_q-layer.9": 2470.1958, "epoch": 0.63, "inbatch_neg_score": 0.5588, "inbatch_pos_score": 1.1045, "learning_rate": 1.888888888888889e-06, "loss": 3.715, "norm_diff": 0.1349, "norm_loss": 0.0, "num_token_doc": 66.5368, "num_token_overlap": 11.6566, "num_token_query": 31.3728, "num_token_union": 64.9751, "num_word_context": 201.926, "num_word_doc": 49.6857, "num_word_query": 23.2979, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4579.9377, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5581, "query_norm": 1.3855, "queue_k_norm": 1.5268, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3728, "sent_len_1": 66.5368, "sent_len_max_0": 127.3312, "sent_len_max_1": 189.185, "stdk": 0.0478, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 96600 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.6956, "doc_norm": 1.5302, "encoder_q-embeddings": 2782.147, "encoder_q-layer.0": 1838.2251, "encoder_q-layer.1": 2052.0684, "encoder_q-layer.10": 2767.7585, "encoder_q-layer.11": 6639.332, "encoder_q-layer.2": 2283.4734, "encoder_q-layer.3": 2397.2805, "encoder_q-layer.4": 2515.1799, "encoder_q-layer.5": 2342.5156, "encoder_q-layer.6": 2314.8154, "encoder_q-layer.7": 2531.1926, "encoder_q-layer.8": 2693.3662, "encoder_q-layer.9": 2433.5183, "epoch": 0.63, "inbatch_neg_score": 0.5587, "inbatch_pos_score": 1.1445, "learning_rate": 1.8333333333333335e-06, "loss": 3.6956, "norm_diff": 0.1335, "norm_loss": 0.0, "num_token_doc": 66.855, "num_token_overlap": 11.6875, "num_token_query": 31.382, "num_token_union": 65.1908, "num_word_context": 202.396, "num_word_doc": 49.8913, "num_word_query": 23.3171, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4653.3431, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5586, "query_norm": 1.3966, "queue_k_norm": 1.5262, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.382, "sent_len_1": 66.855, "sent_len_max_0": 127.3475, "sent_len_max_1": 189.7363, "stdk": 0.0483, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 96700 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.6972, "doc_norm": 1.5263, "encoder_q-embeddings": 2777.282, "encoder_q-layer.0": 1931.5209, "encoder_q-layer.1": 2114.5569, "encoder_q-layer.10": 2630.5283, "encoder_q-layer.11": 6856.3345, "encoder_q-layer.2": 2391.0361, "encoder_q-layer.3": 2534.6123, "encoder_q-layer.4": 2761.3784, "encoder_q-layer.5": 2810.9287, "encoder_q-layer.6": 2844.0884, "encoder_q-layer.7": 2757.3591, "encoder_q-layer.8": 2860.9355, "encoder_q-layer.9": 2535.3928, "epoch": 0.63, "inbatch_neg_score": 0.5558, "inbatch_pos_score": 1.1162, "learning_rate": 1.777777777777778e-06, "loss": 3.6972, "norm_diff": 0.1385, "norm_loss": 0.0, "num_token_doc": 66.6772, "num_token_overlap": 11.7024, "num_token_query": 31.306, "num_token_union": 64.9521, "num_word_context": 202.1247, "num_word_doc": 49.8003, "num_word_query": 23.2645, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4878.3233, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5571, "query_norm": 1.3878, "queue_k_norm": 1.525, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.306, "sent_len_1": 66.6772, "sent_len_max_0": 127.5387, "sent_len_max_1": 189.5975, "stdk": 0.0482, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 96800 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.7125, "doc_norm": 1.5345, "encoder_q-embeddings": 1928.5609, "encoder_q-layer.0": 1299.187, "encoder_q-layer.1": 1352.0989, "encoder_q-layer.10": 2524.0811, "encoder_q-layer.11": 6657.7207, "encoder_q-layer.2": 1563.8966, "encoder_q-layer.3": 1597.744, "encoder_q-layer.4": 1748.5624, "encoder_q-layer.5": 1849.33, "encoder_q-layer.6": 2032.4696, "encoder_q-layer.7": 2300.9185, "encoder_q-layer.8": 2566.2024, "encoder_q-layer.9": 2424.7319, "epoch": 0.63, "inbatch_neg_score": 0.5574, "inbatch_pos_score": 1.1533, "learning_rate": 1.7222222222222222e-06, "loss": 3.7125, "norm_diff": 0.1446, "norm_loss": 0.0, "num_token_doc": 66.8634, "num_token_overlap": 11.6475, "num_token_query": 31.3421, "num_token_union": 65.2001, "num_word_context": 202.9098, "num_word_doc": 49.8593, "num_word_query": 23.263, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4183.1217, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5576, "query_norm": 1.3899, "queue_k_norm": 1.526, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3421, "sent_len_1": 66.8634, "sent_len_max_0": 127.3012, "sent_len_max_1": 190.4975, "stdk": 0.0485, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 96900 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.6966, "doc_norm": 1.5294, "encoder_q-embeddings": 2935.0439, "encoder_q-layer.0": 2058.0901, "encoder_q-layer.1": 2371.9705, "encoder_q-layer.10": 2945.4709, "encoder_q-layer.11": 6735.188, "encoder_q-layer.2": 2746.4016, "encoder_q-layer.3": 2867.3201, "encoder_q-layer.4": 3042.896, "encoder_q-layer.5": 2796.1362, "encoder_q-layer.6": 2612.6741, "encoder_q-layer.7": 2604.9912, "encoder_q-layer.8": 2866.238, "encoder_q-layer.9": 2623.5798, "epoch": 0.63, "inbatch_neg_score": 0.5584, "inbatch_pos_score": 1.1416, "learning_rate": 1.6666666666666667e-06, "loss": 3.6966, "norm_diff": 0.1343, "norm_loss": 0.0, "num_token_doc": 66.8336, "num_token_overlap": 11.6721, "num_token_query": 31.3252, "num_token_union": 65.1595, "num_word_context": 202.51, "num_word_doc": 49.8912, "num_word_query": 23.2338, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4915.2519, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5586, "query_norm": 1.395, "queue_k_norm": 1.5273, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3252, "sent_len_1": 66.8336, "sent_len_max_0": 127.4475, "sent_len_max_1": 188.52, "stdk": 0.0482, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 97000 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.7004, "doc_norm": 1.5291, "encoder_q-embeddings": 2648.061, "encoder_q-layer.0": 1777.9091, "encoder_q-layer.1": 1909.7238, "encoder_q-layer.10": 2563.2112, "encoder_q-layer.11": 6280.6226, "encoder_q-layer.2": 2259.2102, "encoder_q-layer.3": 2280.2517, "encoder_q-layer.4": 2350.3936, "encoder_q-layer.5": 2307.071, "encoder_q-layer.6": 2304.2576, "encoder_q-layer.7": 2441.6294, "encoder_q-layer.8": 2584.9775, "encoder_q-layer.9": 2313.6177, "epoch": 0.63, "inbatch_neg_score": 0.5561, "inbatch_pos_score": 1.1299, "learning_rate": 1.6111111111111111e-06, "loss": 3.7004, "norm_diff": 0.1366, "norm_loss": 0.0, "num_token_doc": 66.8505, "num_token_overlap": 11.743, "num_token_query": 31.4919, "num_token_union": 65.2129, "num_word_context": 202.2801, "num_word_doc": 49.8786, "num_word_query": 23.3991, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4460.3899, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5566, "query_norm": 1.3924, "queue_k_norm": 1.5283, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4919, "sent_len_1": 66.8505, "sent_len_max_0": 127.2775, "sent_len_max_1": 188.985, "stdk": 0.0482, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 97100 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 3.7301, "doc_norm": 1.519, "encoder_q-embeddings": 2170.178, "encoder_q-layer.0": 1407.0562, "encoder_q-layer.1": 1530.2748, "encoder_q-layer.10": 2598.5916, "encoder_q-layer.11": 6591.1172, "encoder_q-layer.2": 1769.6685, "encoder_q-layer.3": 1761.3743, "encoder_q-layer.4": 1868.5328, "encoder_q-layer.5": 1791.0142, "encoder_q-layer.6": 2017.2419, "encoder_q-layer.7": 2302.0276, "encoder_q-layer.8": 2554.2939, "encoder_q-layer.9": 2399.1621, "epoch": 0.63, "inbatch_neg_score": 0.5578, "inbatch_pos_score": 1.1016, "learning_rate": 1.5555555555555556e-06, "loss": 3.7301, "norm_diff": 0.136, "norm_loss": 0.0, "num_token_doc": 66.6838, "num_token_overlap": 11.6096, "num_token_query": 31.2266, "num_token_union": 65.0172, "num_word_context": 202.1561, "num_word_doc": 49.7348, "num_word_query": 23.1788, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4255.5416, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5581, "query_norm": 1.383, "queue_k_norm": 1.5251, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2266, "sent_len_1": 66.6838, "sent_len_max_0": 127.6088, "sent_len_max_1": 190.3625, "stdk": 0.0478, "stdq": 0.042, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 97200 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.6946, "doc_norm": 1.5313, "encoder_q-embeddings": 2148.8086, "encoder_q-layer.0": 1483.1095, "encoder_q-layer.1": 1600.0403, "encoder_q-layer.10": 2599.2749, "encoder_q-layer.11": 6672.7051, "encoder_q-layer.2": 1804.048, "encoder_q-layer.3": 1869.063, "encoder_q-layer.4": 1972.8573, "encoder_q-layer.5": 1984.2151, "encoder_q-layer.6": 2107.5273, "encoder_q-layer.7": 2310.134, "encoder_q-layer.8": 2456.5325, "encoder_q-layer.9": 2370.9653, "epoch": 0.63, "inbatch_neg_score": 0.5568, "inbatch_pos_score": 1.1396, "learning_rate": 1.5e-06, "loss": 3.6946, "norm_diff": 0.139, "norm_loss": 0.0, "num_token_doc": 66.7053, "num_token_overlap": 11.6534, "num_token_query": 31.3909, "num_token_union": 65.1179, "num_word_context": 202.5627, "num_word_doc": 49.7867, "num_word_query": 23.3174, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4309.4924, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5576, "query_norm": 1.3923, "queue_k_norm": 1.5277, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3909, "sent_len_1": 66.7053, "sent_len_max_0": 127.39, "sent_len_max_1": 191.31, "stdk": 0.0483, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 97300 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.7155, "doc_norm": 1.5307, "encoder_q-embeddings": 2587.2673, "encoder_q-layer.0": 1753.6696, "encoder_q-layer.1": 1883.7139, "encoder_q-layer.10": 2425.2537, "encoder_q-layer.11": 6322.7617, "encoder_q-layer.2": 2073.8889, "encoder_q-layer.3": 2381.7625, "encoder_q-layer.4": 2520.6633, "encoder_q-layer.5": 2354.3623, "encoder_q-layer.6": 2656.0681, "encoder_q-layer.7": 2744.4788, "encoder_q-layer.8": 2721.1611, "encoder_q-layer.9": 2379.0833, "epoch": 0.63, "inbatch_neg_score": 0.5582, "inbatch_pos_score": 1.1436, "learning_rate": 1.4444444444444445e-06, "loss": 3.7155, "norm_diff": 0.1353, "norm_loss": 0.0, "num_token_doc": 66.7644, "num_token_overlap": 11.7447, "num_token_query": 31.4854, "num_token_union": 65.1101, "num_word_context": 202.2552, "num_word_doc": 49.8021, "num_word_query": 23.4082, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4527.4998, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5586, "query_norm": 1.3953, "queue_k_norm": 1.5274, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4854, "sent_len_1": 66.7644, "sent_len_max_0": 127.5563, "sent_len_max_1": 191.2775, "stdk": 0.0483, "stdq": 0.0426, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 97400 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 3.7001, "doc_norm": 1.5214, "encoder_q-embeddings": 2493.8879, "encoder_q-layer.0": 1824.4293, "encoder_q-layer.1": 1908.2173, "encoder_q-layer.10": 2699.4219, "encoder_q-layer.11": 6471.1191, "encoder_q-layer.2": 2350.4941, "encoder_q-layer.3": 2449.8755, "encoder_q-layer.4": 2541.9194, "encoder_q-layer.5": 2590.905, "encoder_q-layer.6": 2709.0137, "encoder_q-layer.7": 2741.8394, "encoder_q-layer.8": 2888.9265, "encoder_q-layer.9": 2398.7036, "epoch": 0.63, "inbatch_neg_score": 0.5595, "inbatch_pos_score": 1.123, "learning_rate": 1.388888888888889e-06, "loss": 3.7001, "norm_diff": 0.1334, "norm_loss": 0.0, "num_token_doc": 66.8347, "num_token_overlap": 11.6878, "num_token_query": 31.319, "num_token_union": 65.115, "num_word_context": 202.012, "num_word_doc": 49.8678, "num_word_query": 23.256, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4652.2875, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5586, "query_norm": 1.3881, "queue_k_norm": 1.5269, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.319, "sent_len_1": 66.8347, "sent_len_max_0": 127.37, "sent_len_max_1": 191.5687, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 97500 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.7247, "doc_norm": 1.522, "encoder_q-embeddings": 2987.4524, "encoder_q-layer.0": 2024.0468, "encoder_q-layer.1": 2212.7854, "encoder_q-layer.10": 2438.1008, "encoder_q-layer.11": 6488.8931, "encoder_q-layer.2": 2498.3972, "encoder_q-layer.3": 2589.4688, "encoder_q-layer.4": 2723.0603, "encoder_q-layer.5": 2616.5288, "encoder_q-layer.6": 2500.9568, "encoder_q-layer.7": 2565.5547, "encoder_q-layer.8": 2610.9326, "encoder_q-layer.9": 2252.75, "epoch": 0.64, "inbatch_neg_score": 0.5582, "inbatch_pos_score": 1.125, "learning_rate": 1.3333333333333334e-06, "loss": 3.7247, "norm_diff": 0.1431, "norm_loss": 0.0, "num_token_doc": 66.7538, "num_token_overlap": 11.6334, "num_token_query": 31.2896, "num_token_union": 65.1027, "num_word_context": 202.3389, "num_word_doc": 49.7679, "num_word_query": 23.2351, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4735.563, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5596, "query_norm": 1.3789, "queue_k_norm": 1.5275, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2896, "sent_len_1": 66.7538, "sent_len_max_0": 127.6075, "sent_len_max_1": 189.4263, "stdk": 0.0479, "stdq": 0.0418, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 97600 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.6963, "doc_norm": 1.5294, "encoder_q-embeddings": 8101.1851, "encoder_q-layer.0": 5229.8589, "encoder_q-layer.1": 4884.9731, "encoder_q-layer.10": 2490.2336, "encoder_q-layer.11": 6381.8008, "encoder_q-layer.2": 5750.1318, "encoder_q-layer.3": 5770.7236, "encoder_q-layer.4": 5981.1318, "encoder_q-layer.5": 5434.1694, "encoder_q-layer.6": 5008.9404, "encoder_q-layer.7": 4482.2578, "encoder_q-layer.8": 3704.313, "encoder_q-layer.9": 2439.6389, "epoch": 0.64, "inbatch_neg_score": 0.5607, "inbatch_pos_score": 1.1387, "learning_rate": 1.2777777777777779e-06, "loss": 3.6963, "norm_diff": 0.1397, "norm_loss": 0.0, "num_token_doc": 67.0479, "num_token_overlap": 11.6593, "num_token_query": 31.3099, "num_token_union": 65.2112, "num_word_context": 202.4287, "num_word_doc": 50.0006, "num_word_query": 23.2709, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8284.8147, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5601, "query_norm": 1.3898, "queue_k_norm": 1.5259, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.3099, "sent_len_1": 67.0479, "sent_len_max_0": 127.3713, "sent_len_max_1": 190.435, "stdk": 0.0482, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 97700 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.7036, "doc_norm": 1.5236, "encoder_q-embeddings": 2999.9956, "encoder_q-layer.0": 1977.2616, "encoder_q-layer.1": 2236.4971, "encoder_q-layer.10": 2903.1433, "encoder_q-layer.11": 6754.9736, "encoder_q-layer.2": 2523.0283, "encoder_q-layer.3": 2381.6206, "encoder_q-layer.4": 2558.4417, "encoder_q-layer.5": 2260.252, "encoder_q-layer.6": 2495.9895, "encoder_q-layer.7": 2525.2419, "encoder_q-layer.8": 2731.5425, "encoder_q-layer.9": 2465.9622, "epoch": 0.64, "inbatch_neg_score": 0.5585, "inbatch_pos_score": 1.1348, "learning_rate": 1.2222222222222223e-06, "loss": 3.7036, "norm_diff": 0.1271, "norm_loss": 0.0, "num_token_doc": 66.8547, "num_token_overlap": 11.6369, "num_token_query": 31.3708, "num_token_union": 65.1927, "num_word_context": 202.4221, "num_word_doc": 49.8746, "num_word_query": 23.2815, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4806.6042, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5591, "query_norm": 1.3965, "queue_k_norm": 1.5283, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3708, "sent_len_1": 66.8547, "sent_len_max_0": 127.4975, "sent_len_max_1": 188.4863, "stdk": 0.048, "stdq": 0.0426, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 97800 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.7063, "doc_norm": 1.5284, "encoder_q-embeddings": 2286.6177, "encoder_q-layer.0": 1538.3413, "encoder_q-layer.1": 1639.9189, "encoder_q-layer.10": 2452.8967, "encoder_q-layer.11": 6316.3306, "encoder_q-layer.2": 1847.6152, "encoder_q-layer.3": 1895.3036, "encoder_q-layer.4": 2036.5277, "encoder_q-layer.5": 2106.5935, "encoder_q-layer.6": 2090.7874, "encoder_q-layer.7": 2240.5918, "encoder_q-layer.8": 2451.27, "encoder_q-layer.9": 2287.4883, "epoch": 0.64, "inbatch_neg_score": 0.5591, "inbatch_pos_score": 1.1494, "learning_rate": 1.1666666666666668e-06, "loss": 3.7063, "norm_diff": 0.1343, "norm_loss": 0.0, "num_token_doc": 66.6181, "num_token_overlap": 11.6583, "num_token_query": 31.4353, "num_token_union": 65.0608, "num_word_context": 202.0404, "num_word_doc": 49.6938, "num_word_query": 23.3586, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4198.0308, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5596, "query_norm": 1.3941, "queue_k_norm": 1.5283, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4353, "sent_len_1": 66.6181, "sent_len_max_0": 127.5175, "sent_len_max_1": 189.0637, "stdk": 0.0482, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 97900 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.6963, "doc_norm": 1.5307, "encoder_q-embeddings": 3415.884, "encoder_q-layer.0": 2423.0457, "encoder_q-layer.1": 2541.655, "encoder_q-layer.10": 2876.6128, "encoder_q-layer.11": 7166.4497, "encoder_q-layer.2": 3136.0742, "encoder_q-layer.3": 3120.6472, "encoder_q-layer.4": 2924.9773, "encoder_q-layer.5": 2760.4614, "encoder_q-layer.6": 2997.655, "encoder_q-layer.7": 2925.8118, "encoder_q-layer.8": 3104.3745, "encoder_q-layer.9": 2758.635, "epoch": 0.64, "inbatch_neg_score": 0.5594, "inbatch_pos_score": 1.1445, "learning_rate": 1.1111111111111112e-06, "loss": 3.6963, "norm_diff": 0.1283, "norm_loss": 0.0, "num_token_doc": 66.6841, "num_token_overlap": 11.6597, "num_token_query": 31.2807, "num_token_union": 65.0127, "num_word_context": 202.2292, "num_word_doc": 49.7486, "num_word_query": 23.2204, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5352.7469, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5596, "query_norm": 1.4024, "queue_k_norm": 1.5271, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.2807, "sent_len_1": 66.6841, "sent_len_max_0": 127.46, "sent_len_max_1": 189.7937, "stdk": 0.0483, "stdq": 0.0429, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 98000 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.7075, "doc_norm": 1.5301, "encoder_q-embeddings": 11179.626, "encoder_q-layer.0": 8615.875, "encoder_q-layer.1": 8614.2666, "encoder_q-layer.10": 2692.7783, "encoder_q-layer.11": 6914.0757, "encoder_q-layer.2": 10717.1855, "encoder_q-layer.3": 10963.7646, "encoder_q-layer.4": 9192.3271, "encoder_q-layer.5": 10091.2676, "encoder_q-layer.6": 6821.354, "encoder_q-layer.7": 7111.4468, "encoder_q-layer.8": 5767.0103, "encoder_q-layer.9": 3005.7356, "epoch": 0.64, "inbatch_neg_score": 0.5594, "inbatch_pos_score": 1.1377, "learning_rate": 1.0555555555555557e-06, "loss": 3.7075, "norm_diff": 0.1313, "norm_loss": 0.0, "num_token_doc": 66.8137, "num_token_overlap": 11.6478, "num_token_query": 31.4213, "num_token_union": 65.2108, "num_word_context": 202.6598, "num_word_doc": 49.8898, "num_word_query": 23.3525, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12833.6701, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5601, "query_norm": 1.3988, "queue_k_norm": 1.5281, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4213, "sent_len_1": 66.8137, "sent_len_max_0": 127.4625, "sent_len_max_1": 187.8713, "stdk": 0.0483, "stdq": 0.0427, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 98100 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.7062, "doc_norm": 1.5311, "encoder_q-embeddings": 2792.1746, "encoder_q-layer.0": 1842.3125, "encoder_q-layer.1": 1995.3706, "encoder_q-layer.10": 2518.0891, "encoder_q-layer.11": 6527.1089, "encoder_q-layer.2": 2294.8403, "encoder_q-layer.3": 2438.1091, "encoder_q-layer.4": 2559.6416, "encoder_q-layer.5": 2755.311, "encoder_q-layer.6": 2722.3164, "encoder_q-layer.7": 2727.2329, "encoder_q-layer.8": 2872.6965, "encoder_q-layer.9": 2478.9888, "epoch": 0.64, "inbatch_neg_score": 0.5628, "inbatch_pos_score": 1.1475, "learning_rate": 1.0000000000000002e-06, "loss": 3.7062, "norm_diff": 0.1357, "norm_loss": 0.0, "num_token_doc": 66.6513, "num_token_overlap": 11.6698, "num_token_query": 31.4009, "num_token_union": 65.0135, "num_word_context": 201.7881, "num_word_doc": 49.7075, "num_word_query": 23.3089, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4709.1763, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5615, "query_norm": 1.3954, "queue_k_norm": 1.5276, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.4009, "sent_len_1": 66.6513, "sent_len_max_0": 127.7262, "sent_len_max_1": 190.1675, "stdk": 0.0483, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 98200 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.7072, "doc_norm": 1.5271, "encoder_q-embeddings": 2429.8887, "encoder_q-layer.0": 1628.3997, "encoder_q-layer.1": 1781.218, "encoder_q-layer.10": 2632.7356, "encoder_q-layer.11": 6882.4214, "encoder_q-layer.2": 2031.6902, "encoder_q-layer.3": 2055.9368, "encoder_q-layer.4": 2118.3101, "encoder_q-layer.5": 2065.5864, "encoder_q-layer.6": 2266.7878, "encoder_q-layer.7": 2274.199, "encoder_q-layer.8": 2483.6111, "encoder_q-layer.9": 2421.8564, "epoch": 0.64, "inbatch_neg_score": 0.5622, "inbatch_pos_score": 1.1299, "learning_rate": 9.444444444444445e-07, "loss": 3.7072, "norm_diff": 0.1289, "norm_loss": 0.0, "num_token_doc": 66.7557, "num_token_overlap": 11.6436, "num_token_query": 31.266, "num_token_union": 65.0888, "num_word_context": 202.5144, "num_word_doc": 49.8124, "num_word_query": 23.2169, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4462.769, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5601, "query_norm": 1.3982, "queue_k_norm": 1.5287, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.266, "sent_len_1": 66.7557, "sent_len_max_0": 127.4537, "sent_len_max_1": 190.76, "stdk": 0.0481, "stdq": 0.0427, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 98300 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.7016, "doc_norm": 1.5246, "encoder_q-embeddings": 2786.8787, "encoder_q-layer.0": 1905.839, "encoder_q-layer.1": 2005.6437, "encoder_q-layer.10": 2721.9697, "encoder_q-layer.11": 7034.8135, "encoder_q-layer.2": 2348.3901, "encoder_q-layer.3": 2487.4534, "encoder_q-layer.4": 2408.0974, "encoder_q-layer.5": 2498.5032, "encoder_q-layer.6": 2463.447, "encoder_q-layer.7": 2542.7957, "encoder_q-layer.8": 2725.3884, "encoder_q-layer.9": 2431.6025, "epoch": 0.64, "inbatch_neg_score": 0.5611, "inbatch_pos_score": 1.1367, "learning_rate": 8.88888888888889e-07, "loss": 3.7016, "norm_diff": 0.1401, "norm_loss": 0.0, "num_token_doc": 66.8566, "num_token_overlap": 11.6914, "num_token_query": 31.3669, "num_token_union": 65.1339, "num_word_context": 202.2048, "num_word_doc": 49.8927, "num_word_query": 23.2695, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4819.8093, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5615, "query_norm": 1.3845, "queue_k_norm": 1.5299, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3669, "sent_len_1": 66.8566, "sent_len_max_0": 127.4287, "sent_len_max_1": 188.705, "stdk": 0.048, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 98400 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.7021, "doc_norm": 1.5267, "encoder_q-embeddings": 5564.1611, "encoder_q-layer.0": 3878.1367, "encoder_q-layer.1": 4135.1138, "encoder_q-layer.10": 2616.7524, "encoder_q-layer.11": 6600.3516, "encoder_q-layer.2": 4990.9268, "encoder_q-layer.3": 5590.7998, "encoder_q-layer.4": 5998.1025, "encoder_q-layer.5": 6329.0513, "encoder_q-layer.6": 6950.9585, "encoder_q-layer.7": 6263.7202, "encoder_q-layer.8": 6800.0488, "encoder_q-layer.9": 3447.2908, "epoch": 0.64, "inbatch_neg_score": 0.559, "inbatch_pos_score": 1.1152, "learning_rate": 8.333333333333333e-07, "loss": 3.7021, "norm_diff": 0.136, "norm_loss": 0.0, "num_token_doc": 66.8042, "num_token_overlap": 11.6252, "num_token_query": 31.302, "num_token_union": 65.1313, "num_word_context": 202.3418, "num_word_doc": 49.916, "num_word_query": 23.2573, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8327.3747, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5605, "query_norm": 1.3907, "queue_k_norm": 1.5277, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.302, "sent_len_1": 66.8042, "sent_len_max_0": 127.4775, "sent_len_max_1": 187.9437, "stdk": 0.0481, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 98500 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.6842, "doc_norm": 1.5304, "encoder_q-embeddings": 2249.9397, "encoder_q-layer.0": 1539.2366, "encoder_q-layer.1": 1645.3741, "encoder_q-layer.10": 2425.4397, "encoder_q-layer.11": 6393.0142, "encoder_q-layer.2": 1913.8613, "encoder_q-layer.3": 1983.3588, "encoder_q-layer.4": 2039.1154, "encoder_q-layer.5": 1933.0854, "encoder_q-layer.6": 2153.8616, "encoder_q-layer.7": 2089.7034, "encoder_q-layer.8": 2375.5334, "encoder_q-layer.9": 2280.2102, "epoch": 0.64, "inbatch_neg_score": 0.5604, "inbatch_pos_score": 1.1445, "learning_rate": 7.777777777777778e-07, "loss": 3.6842, "norm_diff": 0.1317, "norm_loss": 0.0, "num_token_doc": 66.8032, "num_token_overlap": 11.714, "num_token_query": 31.5112, "num_token_union": 65.2341, "num_word_context": 202.3457, "num_word_doc": 49.8461, "num_word_query": 23.422, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4208.3211, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5601, "query_norm": 1.3987, "queue_k_norm": 1.5286, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.5112, "sent_len_1": 66.8032, "sent_len_max_0": 127.5812, "sent_len_max_1": 187.4638, "stdk": 0.0482, "stdq": 0.0427, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 98600 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.6851, "doc_norm": 1.5321, "encoder_q-embeddings": 2524.9111, "encoder_q-layer.0": 1654.0441, "encoder_q-layer.1": 1798.3431, "encoder_q-layer.10": 2914.2542, "encoder_q-layer.11": 6512.7549, "encoder_q-layer.2": 2035.6306, "encoder_q-layer.3": 2191.4773, "encoder_q-layer.4": 2398.9592, "encoder_q-layer.5": 2474.458, "encoder_q-layer.6": 2653.988, "encoder_q-layer.7": 2788.5085, "encoder_q-layer.8": 2842.0125, "encoder_q-layer.9": 2450.9412, "epoch": 0.64, "inbatch_neg_score": 0.5602, "inbatch_pos_score": 1.1309, "learning_rate": 7.222222222222222e-07, "loss": 3.6851, "norm_diff": 0.1298, "norm_loss": 0.0, "num_token_doc": 66.7308, "num_token_overlap": 11.6654, "num_token_query": 31.3461, "num_token_union": 65.1144, "num_word_context": 202.5934, "num_word_doc": 49.8278, "num_word_query": 23.2739, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4555.622, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5601, "query_norm": 1.4024, "queue_k_norm": 1.5285, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.3461, "sent_len_1": 66.7308, "sent_len_max_0": 127.5713, "sent_len_max_1": 189.7363, "stdk": 0.0483, "stdq": 0.0429, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 98700 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.7247, "doc_norm": 1.5277, "encoder_q-embeddings": 1914.2509, "encoder_q-layer.0": 1308.0536, "encoder_q-layer.1": 1363.9039, "encoder_q-layer.10": 2587.3071, "encoder_q-layer.11": 7029.7373, "encoder_q-layer.2": 1524.054, "encoder_q-layer.3": 1642.0988, "encoder_q-layer.4": 1730.1172, "encoder_q-layer.5": 1704.5569, "encoder_q-layer.6": 1819.3048, "encoder_q-layer.7": 2043.8805, "encoder_q-layer.8": 2446.5942, "encoder_q-layer.9": 2264.1125, "epoch": 0.64, "inbatch_neg_score": 0.5629, "inbatch_pos_score": 1.125, "learning_rate": 6.666666666666667e-07, "loss": 3.7247, "norm_diff": 0.1441, "norm_loss": 0.0, "num_token_doc": 66.601, "num_token_overlap": 11.6551, "num_token_query": 31.3863, "num_token_union": 65.047, "num_word_context": 202.3049, "num_word_doc": 49.6838, "num_word_query": 23.3042, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4266.1406, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5615, "query_norm": 1.3836, "queue_k_norm": 1.5268, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3863, "sent_len_1": 66.601, "sent_len_max_0": 127.3487, "sent_len_max_1": 190.6087, "stdk": 0.0481, "stdq": 0.0421, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 98800 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.6779, "doc_norm": 1.5278, "encoder_q-embeddings": 2640.6975, "encoder_q-layer.0": 1816.1224, "encoder_q-layer.1": 1997.4365, "encoder_q-layer.10": 2875.7886, "encoder_q-layer.11": 6589.7095, "encoder_q-layer.2": 2294.2385, "encoder_q-layer.3": 2247.5959, "encoder_q-layer.4": 2366.9431, "encoder_q-layer.5": 2291.9761, "encoder_q-layer.6": 2584.8682, "encoder_q-layer.7": 2730.1814, "encoder_q-layer.8": 2692.7104, "encoder_q-layer.9": 2431.1262, "epoch": 0.64, "inbatch_neg_score": 0.5618, "inbatch_pos_score": 1.1367, "learning_rate": 6.111111111111112e-07, "loss": 3.6779, "norm_diff": 0.1365, "norm_loss": 0.0, "num_token_doc": 66.5367, "num_token_overlap": 11.7002, "num_token_query": 31.4211, "num_token_union": 64.9703, "num_word_context": 202.0275, "num_word_doc": 49.6412, "num_word_query": 23.3392, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4590.6915, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5615, "query_norm": 1.3913, "queue_k_norm": 1.5276, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4211, "sent_len_1": 66.5367, "sent_len_max_0": 127.3388, "sent_len_max_1": 189.2537, "stdk": 0.0481, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 98900 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.6876, "doc_norm": 1.5326, "encoder_q-embeddings": 2313.6704, "encoder_q-layer.0": 1568.5929, "encoder_q-layer.1": 1728.0568, "encoder_q-layer.10": 2943.7866, "encoder_q-layer.11": 6858.5918, "encoder_q-layer.2": 1984.6531, "encoder_q-layer.3": 2094.6914, "encoder_q-layer.4": 2223.6226, "encoder_q-layer.5": 2335.635, "encoder_q-layer.6": 2466.3589, "encoder_q-layer.7": 2339.6348, "encoder_q-layer.8": 2662.8499, "encoder_q-layer.9": 2375.7109, "epoch": 0.64, "inbatch_neg_score": 0.5621, "inbatch_pos_score": 1.1406, "learning_rate": 5.555555555555556e-07, "loss": 3.6876, "norm_diff": 0.1417, "norm_loss": 0.0, "num_token_doc": 66.7454, "num_token_overlap": 11.6604, "num_token_query": 31.3333, "num_token_union": 65.1256, "num_word_context": 202.4033, "num_word_doc": 49.7992, "num_word_query": 23.2922, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4561.1432, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.561, "query_norm": 1.3908, "queue_k_norm": 1.5283, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3333, "sent_len_1": 66.7454, "sent_len_max_0": 127.4775, "sent_len_max_1": 188.9325, "stdk": 0.0483, "stdq": 0.0424, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 99000 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.7085, "doc_norm": 1.5266, "encoder_q-embeddings": 9860.3066, "encoder_q-layer.0": 6903.4673, "encoder_q-layer.1": 5869.7305, "encoder_q-layer.10": 2550.9836, "encoder_q-layer.11": 6323.4805, "encoder_q-layer.2": 4781.3887, "encoder_q-layer.3": 5011.6201, "encoder_q-layer.4": 5149.5894, "encoder_q-layer.5": 4945.5874, "encoder_q-layer.6": 4619.6157, "encoder_q-layer.7": 4118.1357, "encoder_q-layer.8": 3073.0742, "encoder_q-layer.9": 2463.5825, "epoch": 0.65, "inbatch_neg_score": 0.5616, "inbatch_pos_score": 1.1357, "learning_rate": 5.000000000000001e-07, "loss": 3.7085, "norm_diff": 0.1307, "norm_loss": 0.0, "num_token_doc": 66.8422, "num_token_overlap": 11.5663, "num_token_query": 31.2078, "num_token_union": 65.1427, "num_word_context": 202.2872, "num_word_doc": 49.8708, "num_word_query": 23.1435, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8918.379, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5615, "query_norm": 1.3959, "queue_k_norm": 1.5296, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.2078, "sent_len_1": 66.8422, "sent_len_max_0": 127.4125, "sent_len_max_1": 190.2138, "stdk": 0.0481, "stdq": 0.0426, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 99100 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.692, "doc_norm": 1.529, "encoder_q-embeddings": 3229.6658, "encoder_q-layer.0": 2163.2939, "encoder_q-layer.1": 2389.5764, "encoder_q-layer.10": 2777.6931, "encoder_q-layer.11": 6599.2222, "encoder_q-layer.2": 2688.8943, "encoder_q-layer.3": 2858.8599, "encoder_q-layer.4": 3022.0898, "encoder_q-layer.5": 3085.9026, "encoder_q-layer.6": 3417.1794, "encoder_q-layer.7": 3537.3237, "encoder_q-layer.8": 3314.6187, "encoder_q-layer.9": 2502.7542, "epoch": 0.65, "inbatch_neg_score": 0.5616, "inbatch_pos_score": 1.1553, "learning_rate": 4.444444444444445e-07, "loss": 3.692, "norm_diff": 0.1324, "norm_loss": 0.0, "num_token_doc": 66.7492, "num_token_overlap": 11.7199, "num_token_query": 31.5114, "num_token_union": 65.1671, "num_word_context": 202.3479, "num_word_doc": 49.8242, "num_word_query": 23.4061, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5258.9181, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.562, "query_norm": 1.3965, "queue_k_norm": 1.5304, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.5114, "sent_len_1": 66.7492, "sent_len_max_0": 127.5, "sent_len_max_1": 188.715, "stdk": 0.0482, "stdq": 0.0426, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 99200 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.6835, "doc_norm": 1.5359, "encoder_q-embeddings": 1999.2535, "encoder_q-layer.0": 1372.8213, "encoder_q-layer.1": 1438.783, "encoder_q-layer.10": 2453.7029, "encoder_q-layer.11": 6189.7812, "encoder_q-layer.2": 1601.172, "encoder_q-layer.3": 1611.6956, "encoder_q-layer.4": 1666.3519, "encoder_q-layer.5": 1732.6263, "encoder_q-layer.6": 1885.3494, "encoder_q-layer.7": 2064.48, "encoder_q-layer.8": 2393.2864, "encoder_q-layer.9": 2270.5879, "epoch": 0.65, "inbatch_neg_score": 0.5602, "inbatch_pos_score": 1.1465, "learning_rate": 3.888888888888889e-07, "loss": 3.6835, "norm_diff": 0.1452, "norm_loss": 0.0, "num_token_doc": 66.6224, "num_token_overlap": 11.7653, "num_token_query": 31.6936, "num_token_union": 65.1299, "num_word_context": 202.3004, "num_word_doc": 49.6875, "num_word_query": 23.5661, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3918.616, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.561, "query_norm": 1.3907, "queue_k_norm": 1.5295, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.6936, "sent_len_1": 66.6224, "sent_len_max_0": 127.6875, "sent_len_max_1": 188.4675, "stdk": 0.0485, "stdq": 0.0424, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 99300 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.7047, "doc_norm": 1.5305, "encoder_q-embeddings": 75913.9766, "encoder_q-layer.0": 55433.4609, "encoder_q-layer.1": 63012.0898, "encoder_q-layer.10": 2888.7341, "encoder_q-layer.11": 6519.0093, "encoder_q-layer.2": 74229.1719, "encoder_q-layer.3": 77965.6953, "encoder_q-layer.4": 84177.5547, "encoder_q-layer.5": 74692.25, "encoder_q-layer.6": 58816.4219, "encoder_q-layer.7": 43079.8672, "encoder_q-layer.8": 26244.1426, "encoder_q-layer.9": 10592.7314, "epoch": 0.65, "inbatch_neg_score": 0.5607, "inbatch_pos_score": 1.1494, "learning_rate": 3.3333333333333335e-07, "loss": 3.7047, "norm_diff": 0.1342, "norm_loss": 0.0, "num_token_doc": 66.6644, "num_token_overlap": 11.6586, "num_token_query": 31.2873, "num_token_union": 64.9523, "num_word_context": 202.126, "num_word_doc": 49.7519, "num_word_query": 23.2432, "postclip_grad_norm": 1.0, "preclip_grad_norm": 88568.3307, "preclip_grad_norm_avg": 0.0008, "q@queue_neg_score": 0.5615, "query_norm": 1.3963, "queue_k_norm": 1.5279, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.2873, "sent_len_1": 66.6644, "sent_len_max_0": 127.4375, "sent_len_max_1": 189.0925, "stdk": 0.0483, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 99400 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 3.719, "doc_norm": 1.522, "encoder_q-embeddings": 2602.8364, "encoder_q-layer.0": 1784.4302, "encoder_q-layer.1": 2096.4924, "encoder_q-layer.10": 2740.0156, "encoder_q-layer.11": 6520.6514, "encoder_q-layer.2": 2389.8701, "encoder_q-layer.3": 2453.7561, "encoder_q-layer.4": 2649.6125, "encoder_q-layer.5": 2813.0129, "encoder_q-layer.6": 2829.2451, "encoder_q-layer.7": 2679.0303, "encoder_q-layer.8": 2707.7334, "encoder_q-layer.9": 2487.3997, "epoch": 0.65, "inbatch_neg_score": 0.561, "inbatch_pos_score": 1.1289, "learning_rate": 2.777777777777778e-07, "loss": 3.719, "norm_diff": 0.1358, "norm_loss": 0.0, "num_token_doc": 66.8066, "num_token_overlap": 11.6331, "num_token_query": 31.1766, "num_token_union": 65.0471, "num_word_context": 202.4129, "num_word_doc": 49.8005, "num_word_query": 23.1178, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4743.8345, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.562, "query_norm": 1.3862, "queue_k_norm": 1.5281, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.1766, "sent_len_1": 66.8066, "sent_len_max_0": 127.4488, "sent_len_max_1": 188.9787, "stdk": 0.0479, "stdq": 0.0421, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 99500 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.7144, "doc_norm": 1.5346, "encoder_q-embeddings": 2590.9819, "encoder_q-layer.0": 1742.1066, "encoder_q-layer.1": 1888.8641, "encoder_q-layer.10": 2549.458, "encoder_q-layer.11": 6295.5703, "encoder_q-layer.2": 2189.8345, "encoder_q-layer.3": 2200.2258, "encoder_q-layer.4": 2286.3848, "encoder_q-layer.5": 2456.8901, "encoder_q-layer.6": 2249.873, "encoder_q-layer.7": 2290.2214, "encoder_q-layer.8": 2549.8237, "encoder_q-layer.9": 2308.0769, "epoch": 0.65, "inbatch_neg_score": 0.5615, "inbatch_pos_score": 1.1445, "learning_rate": 2.2222222222222224e-07, "loss": 3.7144, "norm_diff": 0.1387, "norm_loss": 0.0, "num_token_doc": 66.6661, "num_token_overlap": 11.6528, "num_token_query": 31.3817, "num_token_union": 65.0826, "num_word_context": 202.239, "num_word_doc": 49.7122, "num_word_query": 23.32, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4408.2996, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5615, "query_norm": 1.3959, "queue_k_norm": 1.5287, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.3817, "sent_len_1": 66.6661, "sent_len_max_0": 127.5625, "sent_len_max_1": 190.3688, "stdk": 0.0484, "stdq": 0.0426, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 99600 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.6971, "doc_norm": 1.522, "encoder_q-embeddings": 2504.0256, "encoder_q-layer.0": 1721.837, "encoder_q-layer.1": 1998.6158, "encoder_q-layer.10": 2551.0474, "encoder_q-layer.11": 6855.1265, "encoder_q-layer.2": 2391.2925, "encoder_q-layer.3": 2329.519, "encoder_q-layer.4": 2220.6946, "encoder_q-layer.5": 2090.6577, "encoder_q-layer.6": 2181.7456, "encoder_q-layer.7": 2286.136, "encoder_q-layer.8": 2588.2363, "encoder_q-layer.9": 2391.9148, "epoch": 0.65, "inbatch_neg_score": 0.5624, "inbatch_pos_score": 1.1289, "learning_rate": 1.6666666666666668e-07, "loss": 3.6971, "norm_diff": 0.1306, "norm_loss": 0.0, "num_token_doc": 66.8469, "num_token_overlap": 11.6732, "num_token_query": 31.4099, "num_token_union": 65.1864, "num_word_context": 202.3541, "num_word_doc": 49.9091, "num_word_query": 23.3355, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4595.3318, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5615, "query_norm": 1.3915, "queue_k_norm": 1.5268, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.4099, "sent_len_1": 66.8469, "sent_len_max_0": 127.31, "sent_len_max_1": 189.615, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 99700 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.7051, "doc_norm": 1.5271, "encoder_q-embeddings": 5521.5557, "encoder_q-layer.0": 3842.6328, "encoder_q-layer.1": 4017.9688, "encoder_q-layer.10": 5775.5381, "encoder_q-layer.11": 13893.9551, "encoder_q-layer.2": 4577.6001, "encoder_q-layer.3": 4530.8027, "encoder_q-layer.4": 5218.1265, "encoder_q-layer.5": 4703.3574, "encoder_q-layer.6": 4394.209, "encoder_q-layer.7": 4741.9038, "encoder_q-layer.8": 5348.1782, "encoder_q-layer.9": 5087.71, "epoch": 0.65, "inbatch_neg_score": 0.5617, "inbatch_pos_score": 1.1484, "learning_rate": 1.1111111111111112e-07, "loss": 3.7051, "norm_diff": 0.1289, "norm_loss": 0.0, "num_token_doc": 66.4848, "num_token_overlap": 11.6402, "num_token_query": 31.3433, "num_token_union": 64.9682, "num_word_context": 201.9179, "num_word_doc": 49.6411, "num_word_query": 23.284, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9494.1425, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5625, "query_norm": 1.3981, "queue_k_norm": 1.5292, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.3433, "sent_len_1": 66.4848, "sent_len_max_0": 127.5475, "sent_len_max_1": 186.66, "stdk": 0.0481, "stdq": 0.0427, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 99800 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.6941, "doc_norm": 1.5317, "encoder_q-embeddings": 4463.3384, "encoder_q-layer.0": 2953.3455, "encoder_q-layer.1": 3151.3621, "encoder_q-layer.10": 5357.9717, "encoder_q-layer.11": 14302.1562, "encoder_q-layer.2": 3601.167, "encoder_q-layer.3": 3651.9666, "encoder_q-layer.4": 3773.8218, "encoder_q-layer.5": 3952.3447, "encoder_q-layer.6": 4334.5566, "encoder_q-layer.7": 4827.6465, "encoder_q-layer.8": 5219.6069, "encoder_q-layer.9": 5113.6606, "epoch": 0.65, "inbatch_neg_score": 0.5606, "inbatch_pos_score": 1.1084, "learning_rate": 5.555555555555556e-08, "loss": 3.6941, "norm_diff": 0.1424, "norm_loss": 0.0, "num_token_doc": 66.8603, "num_token_overlap": 11.6897, "num_token_query": 31.4205, "num_token_union": 65.2072, "num_word_context": 202.3702, "num_word_doc": 49.886, "num_word_query": 23.3453, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9093.5227, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.561, "query_norm": 1.3893, "queue_k_norm": 1.5282, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.4205, "sent_len_1": 66.8603, "sent_len_max_0": 127.3838, "sent_len_max_1": 189.3162, "stdk": 0.0483, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 99900 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.6998, "doc_norm": 1.5333, "encoder_q-embeddings": 4746.6353, "encoder_q-layer.0": 3203.8914, "encoder_q-layer.1": 3312.9883, "encoder_q-layer.10": 5345.9707, "encoder_q-layer.11": 13415.2529, "encoder_q-layer.2": 3643.4302, "encoder_q-layer.3": 3654.8335, "encoder_q-layer.4": 3712.9622, "encoder_q-layer.5": 3694.9253, "encoder_q-layer.6": 4045.8359, "encoder_q-layer.7": 4457.5757, "encoder_q-layer.8": 5143.9668, "encoder_q-layer.9": 4653.582, "epoch": 0.65, "inbatch_neg_score": 0.5638, "inbatch_pos_score": 1.1299, "learning_rate": 0.0, "loss": 3.6998, "norm_diff": 0.1471, "norm_loss": 0.0, "num_token_doc": 66.6725, "num_token_overlap": 11.6995, "num_token_query": 31.4759, "num_token_union": 65.0891, "num_word_context": 202.0858, "num_word_doc": 49.7334, "num_word_query": 23.3757, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8644.9043, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5635, "query_norm": 1.3862, "queue_k_norm": 1.5288, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.4759, "sent_len_1": 66.6725, "sent_len_max_0": 127.4325, "sent_len_max_1": 191.4125, "stdk": 0.0483, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 100000 }, { "dev_runtime": 29.3658, "dev_samples_per_second": 2.179, "dev_steps_per_second": 0.034, "epoch": 0.65, "step": 100000, "test_accuracy": 93.37158203125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3892047703266144, "test_doc_norm": 1.49075448513031, "test_inbatch_neg_score": 0.8894694447517395, "test_inbatch_pos_score": 1.788484811782837, "test_loss": 0.3892047703266144, "test_loss_align": 0.8977279663085938, "test_loss_unif": 3.4025321006774902, "test_loss_unif_q@queue": 3.4025321006774902, "test_norm_diff": 0.007765868678689003, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.5488045811653137, "test_query_norm": 1.491594910621643, "test_queue_k_norm": 1.529219627380371, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.041528571397066116, "test_stdq": 0.041504982858896255, "test_stdqueue_k": 0.04827713593840599, "test_stdqueue_q": 0.0 }, { "dev_runtime": 29.3658, "dev_samples_per_second": 2.179, "dev_steps_per_second": 0.034, "epoch": 0.65, "eval_beir-arguana_ndcg@10": 0.39248, "eval_beir-arguana_recall@10": 0.66643, "eval_beir-arguana_recall@100": 0.95021, "eval_beir-arguana_recall@20": 0.79801, "eval_beir-avg_ndcg@10": 0.38176791666666665, "eval_beir-avg_recall@10": 0.45404824999999993, "eval_beir-avg_recall@100": 0.6408104166666666, "eval_beir-avg_recall@20": 0.51861675, "eval_beir-cqadupstack_ndcg@10": 0.26470916666666666, "eval_beir-cqadupstack_recall@10": 0.3629725000000001, "eval_beir-cqadupstack_recall@100": 0.5973441666666667, "eval_beir-cqadupstack_recall@20": 0.43310750000000003, "eval_beir-fiqa_ndcg@10": 0.25066, "eval_beir-fiqa_recall@10": 0.30946, "eval_beir-fiqa_recall@100": 0.59031, "eval_beir-fiqa_recall@20": 0.39366, "eval_beir-nfcorpus_ndcg@10": 0.30476, "eval_beir-nfcorpus_recall@10": 0.14819, "eval_beir-nfcorpus_recall@100": 0.29111, "eval_beir-nfcorpus_recall@20": 0.18405, "eval_beir-nq_ndcg@10": 0.28393, "eval_beir-nq_recall@10": 0.46526, "eval_beir-nq_recall@100": 0.80939, "eval_beir-nq_recall@20": 0.58679, "eval_beir-quora_ndcg@10": 0.77443, "eval_beir-quora_recall@10": 0.88528, "eval_beir-quora_recall@100": 0.97659, "eval_beir-quora_recall@20": 0.92734, "eval_beir-scidocs_ndcg@10": 0.15597, "eval_beir-scidocs_recall@10": 0.16583, "eval_beir-scidocs_recall@100": 0.37012, "eval_beir-scidocs_recall@20": 0.22418, "eval_beir-scifact_ndcg@10": 0.63329, "eval_beir-scifact_recall@10": 0.78622, "eval_beir-scifact_recall@100": 0.91711, "eval_beir-scifact_recall@20": 0.843, "eval_beir-trec-covid_ndcg@10": 0.57965, "eval_beir-trec-covid_recall@10": 0.628, "eval_beir-trec-covid_recall@100": 0.4656, "eval_beir-trec-covid_recall@20": 0.587, "eval_beir-webis-touche2020_ndcg@10": 0.1778, "eval_beir-webis-touche2020_recall@10": 0.12284, "eval_beir-webis-touche2020_recall@100": 0.44032, "eval_beir-webis-touche2020_recall@20": 0.20903, "eval_senteval-avg_sts": 0.7582930722284593, "eval_senteval-sickr_spearman": 0.7241117459531325, "eval_senteval-stsb_spearman": 0.792474398503786, "step": 100000, "test_accuracy": 93.37158203125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3892047703266144, "test_doc_norm": 1.49075448513031, "test_inbatch_neg_score": 0.8894694447517395, "test_inbatch_pos_score": 1.788484811782837, "test_loss": 0.3892047703266144, "test_loss_align": 0.8977279663085938, "test_loss_unif": 3.4025321006774902, "test_loss_unif_q@queue": 3.4025321006774902, "test_norm_diff": 0.007765868678689003, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.5488045811653137, "test_query_norm": 1.491594910621643, "test_queue_k_norm": 1.529219627380371, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.041528571397066116, "test_stdq": 0.041504982858896255, "test_stdqueue_k": 0.04827713593840599, "test_stdqueue_q": 0.0 }, { "epoch": 0.65, "step": 100000, "total_flos": 0, "train_runtime": 79837.2561, "train_samples_per_second": 1.253 } ], "max_steps": 100000, "num_train_epochs": 1, "total_flos": 0, "trial_name": null, "trial_params": null }