| { |
| "best_global_step": 490, |
| "best_metric": 0.38254301379946687, |
| "best_model_checkpoint": "nomic-ai/modernbert-embed-base/checkpoint-490", |
| "epoch": 6.0, |
| "eval_steps": 500, |
| "global_step": 588, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01020408163265306, |
| "grad_norm": 97.22399139404297, |
| "learning_rate": 0.0, |
| "loss": 4.0461, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.02040816326530612, |
| "grad_norm": 201.0540313720703, |
| "learning_rate": 1.0204081632653061e-07, |
| "loss": 7.4174, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.030612244897959183, |
| "grad_norm": 114.41702270507812, |
| "learning_rate": 2.0408163265306121e-07, |
| "loss": 4.0528, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.04081632653061224, |
| "grad_norm": 74.34385681152344, |
| "learning_rate": 3.0612244897959183e-07, |
| "loss": 2.6554, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.05102040816326531, |
| "grad_norm": 26.128253936767578, |
| "learning_rate": 4.0816326530612243e-07, |
| "loss": 0.5018, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.061224489795918366, |
| "grad_norm": 55.006412506103516, |
| "learning_rate": 5.102040816326531e-07, |
| "loss": 0.7805, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 119.91863250732422, |
| "learning_rate": 6.122448979591837e-07, |
| "loss": 2.9274, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.08163265306122448, |
| "grad_norm": 147.19371032714844, |
| "learning_rate": 7.142857142857143e-07, |
| "loss": 4.5888, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.09183673469387756, |
| "grad_norm": 173.107177734375, |
| "learning_rate": 8.163265306122449e-07, |
| "loss": 2.5851, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.10204081632653061, |
| "grad_norm": 16.112817764282227, |
| "learning_rate": 9.183673469387756e-07, |
| "loss": 0.4261, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.11224489795918367, |
| "grad_norm": 44.18981170654297, |
| "learning_rate": 1.0204081632653063e-06, |
| "loss": 0.6066, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.12244897959183673, |
| "grad_norm": 40.847877502441406, |
| "learning_rate": 1.122448979591837e-06, |
| "loss": 1.5421, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.1326530612244898, |
| "grad_norm": 22.00406265258789, |
| "learning_rate": 1.2244897959183673e-06, |
| "loss": 0.5044, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 90.59610748291016, |
| "learning_rate": 1.3265306122448982e-06, |
| "loss": 1.6806, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.15306122448979592, |
| "grad_norm": 82.69358825683594, |
| "learning_rate": 1.4285714285714286e-06, |
| "loss": 1.8214, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.16326530612244897, |
| "grad_norm": 83.0409927368164, |
| "learning_rate": 1.5306122448979593e-06, |
| "loss": 2.6111, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.17346938775510204, |
| "grad_norm": 187.79660034179688, |
| "learning_rate": 1.6326530612244897e-06, |
| "loss": 8.3034, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.1836734693877551, |
| "grad_norm": 27.019817352294922, |
| "learning_rate": 1.7346938775510206e-06, |
| "loss": 0.5837, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.19387755102040816, |
| "grad_norm": 57.62622833251953, |
| "learning_rate": 1.8367346938775512e-06, |
| "loss": 2.4009, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.20408163265306123, |
| "grad_norm": 30.775474548339844, |
| "learning_rate": 1.938775510204082e-06, |
| "loss": 0.8685, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 184.99273681640625, |
| "learning_rate": 2.0408163265306125e-06, |
| "loss": 3.1922, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.22448979591836735, |
| "grad_norm": 58.83700180053711, |
| "learning_rate": 2.1428571428571427e-06, |
| "loss": 4.7617, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.23469387755102042, |
| "grad_norm": 61.65654373168945, |
| "learning_rate": 2.244897959183674e-06, |
| "loss": 1.962, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.24489795918367346, |
| "grad_norm": 164.40609741210938, |
| "learning_rate": 2.3469387755102044e-06, |
| "loss": 7.5857, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.25510204081632654, |
| "grad_norm": 5.837972640991211, |
| "learning_rate": 2.4489795918367347e-06, |
| "loss": 0.1287, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.2653061224489796, |
| "grad_norm": 82.78257751464844, |
| "learning_rate": 2.5510204081632657e-06, |
| "loss": 3.0167, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.2755102040816326, |
| "grad_norm": 62.918235778808594, |
| "learning_rate": 2.6530612244897964e-06, |
| "loss": 3.8032, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 118.64309692382812, |
| "learning_rate": 2.7551020408163266e-06, |
| "loss": 3.8445, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.29591836734693877, |
| "grad_norm": 103.83515167236328, |
| "learning_rate": 2.8571428571428573e-06, |
| "loss": 1.6414, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.30612244897959184, |
| "grad_norm": 162.89938354492188, |
| "learning_rate": 2.959183673469388e-06, |
| "loss": 6.3828, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.3163265306122449, |
| "grad_norm": 93.17887115478516, |
| "learning_rate": 3.0612244897959185e-06, |
| "loss": 3.1969, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.32653061224489793, |
| "grad_norm": 35.798160552978516, |
| "learning_rate": 3.1632653061224496e-06, |
| "loss": 0.7605, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.336734693877551, |
| "grad_norm": 91.22254943847656, |
| "learning_rate": 3.2653061224489794e-06, |
| "loss": 5.0711, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.3469387755102041, |
| "grad_norm": 147.5310821533203, |
| "learning_rate": 3.3673469387755105e-06, |
| "loss": 2.6523, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 20.746273040771484, |
| "learning_rate": 3.469387755102041e-06, |
| "loss": 0.4005, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.3673469387755102, |
| "grad_norm": 43.79488754272461, |
| "learning_rate": 3.5714285714285718e-06, |
| "loss": 1.757, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.37755102040816324, |
| "grad_norm": 133.88282775878906, |
| "learning_rate": 3.6734693877551024e-06, |
| "loss": 3.1397, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.3877551020408163, |
| "grad_norm": 203.5764617919922, |
| "learning_rate": 3.7755102040816327e-06, |
| "loss": 3.6261, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.3979591836734694, |
| "grad_norm": 80.596923828125, |
| "learning_rate": 3.877551020408164e-06, |
| "loss": 2.7427, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.40816326530612246, |
| "grad_norm": 15.089832305908203, |
| "learning_rate": 3.979591836734694e-06, |
| "loss": 0.4561, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.41836734693877553, |
| "grad_norm": 1.917051076889038, |
| "learning_rate": 4.081632653061225e-06, |
| "loss": 0.0331, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 126.42117309570312, |
| "learning_rate": 4.183673469387755e-06, |
| "loss": 5.1981, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.4387755102040816, |
| "grad_norm": 19.87098503112793, |
| "learning_rate": 4.2857142857142855e-06, |
| "loss": 0.5115, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.4489795918367347, |
| "grad_norm": 38.2932014465332, |
| "learning_rate": 4.3877551020408165e-06, |
| "loss": 1.119, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.45918367346938777, |
| "grad_norm": 105.88655090332031, |
| "learning_rate": 4.489795918367348e-06, |
| "loss": 1.8869, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.46938775510204084, |
| "grad_norm": 77.14546203613281, |
| "learning_rate": 4.591836734693878e-06, |
| "loss": 2.7846, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.47959183673469385, |
| "grad_norm": 85.57715606689453, |
| "learning_rate": 4.693877551020409e-06, |
| "loss": 2.4171, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.4897959183673469, |
| "grad_norm": 58.05616760253906, |
| "learning_rate": 4.795918367346939e-06, |
| "loss": 2.6935, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 44.124664306640625, |
| "learning_rate": 4.897959183673469e-06, |
| "loss": 1.0925, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.5102040816326531, |
| "grad_norm": 73.12091827392578, |
| "learning_rate": 5e-06, |
| "loss": 2.0241, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.5204081632653061, |
| "grad_norm": 185.66015625, |
| "learning_rate": 5.1020408163265315e-06, |
| "loss": 7.4609, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.5306122448979592, |
| "grad_norm": 93.33148193359375, |
| "learning_rate": 5.204081632653062e-06, |
| "loss": 3.2983, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.5408163265306123, |
| "grad_norm": 77.21890258789062, |
| "learning_rate": 5.306122448979593e-06, |
| "loss": 3.8886, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.5510204081632653, |
| "grad_norm": 39.11575698852539, |
| "learning_rate": 5.408163265306123e-06, |
| "loss": 0.5936, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.5612244897959183, |
| "grad_norm": 48.75920867919922, |
| "learning_rate": 5.510204081632653e-06, |
| "loss": 0.8204, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 9.229560852050781, |
| "learning_rate": 5.6122448979591834e-06, |
| "loss": 0.1836, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.5816326530612245, |
| "grad_norm": 52.3462028503418, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 0.4946, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.5918367346938775, |
| "grad_norm": 30.56440544128418, |
| "learning_rate": 5.816326530612246e-06, |
| "loss": 0.2755, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.6020408163265306, |
| "grad_norm": 13.171940803527832, |
| "learning_rate": 5.918367346938776e-06, |
| "loss": 0.1641, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.6122448979591837, |
| "grad_norm": 55.642364501953125, |
| "learning_rate": 6.020408163265307e-06, |
| "loss": 1.2537, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.6224489795918368, |
| "grad_norm": 67.12672424316406, |
| "learning_rate": 6.122448979591837e-06, |
| "loss": 4.3895, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.6326530612244898, |
| "grad_norm": 133.45712280273438, |
| "learning_rate": 6.224489795918368e-06, |
| "loss": 3.2041, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.6428571428571429, |
| "grad_norm": 115.94568634033203, |
| "learning_rate": 6.326530612244899e-06, |
| "loss": 3.2087, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.6530612244897959, |
| "grad_norm": 196.57113647460938, |
| "learning_rate": 6.4285714285714295e-06, |
| "loss": 8.0364, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.6632653061224489, |
| "grad_norm": 43.574398040771484, |
| "learning_rate": 6.530612244897959e-06, |
| "loss": 0.7748, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.673469387755102, |
| "grad_norm": 133.8882293701172, |
| "learning_rate": 6.63265306122449e-06, |
| "loss": 4.7505, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.6836734693877551, |
| "grad_norm": 35.49979019165039, |
| "learning_rate": 6.734693877551021e-06, |
| "loss": 2.2919, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.6938775510204082, |
| "grad_norm": 56.431461334228516, |
| "learning_rate": 6.836734693877551e-06, |
| "loss": 0.6432, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.7040816326530612, |
| "grad_norm": 44.0866813659668, |
| "learning_rate": 6.938775510204082e-06, |
| "loss": 0.97, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 156.4910125732422, |
| "learning_rate": 7.0408163265306125e-06, |
| "loss": 4.787, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.7244897959183674, |
| "grad_norm": 339.540771484375, |
| "learning_rate": 7.1428571428571436e-06, |
| "loss": 2.6329, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.7346938775510204, |
| "grad_norm": 80.52706146240234, |
| "learning_rate": 7.244897959183675e-06, |
| "loss": 1.2897, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.7448979591836735, |
| "grad_norm": 115.99695587158203, |
| "learning_rate": 7.346938775510205e-06, |
| "loss": 2.2093, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.7551020408163265, |
| "grad_norm": 59.96979904174805, |
| "learning_rate": 7.448979591836736e-06, |
| "loss": 1.7263, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.7653061224489796, |
| "grad_norm": 40.83548355102539, |
| "learning_rate": 7.551020408163265e-06, |
| "loss": 0.9284, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.7755102040816326, |
| "grad_norm": 14.140007019042969, |
| "learning_rate": 7.653061224489796e-06, |
| "loss": 0.2508, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.7857142857142857, |
| "grad_norm": 0.5128963589668274, |
| "learning_rate": 7.755102040816327e-06, |
| "loss": 0.0072, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.7959183673469388, |
| "grad_norm": 10.41791820526123, |
| "learning_rate": 7.857142857142858e-06, |
| "loss": 0.1753, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.8061224489795918, |
| "grad_norm": 30.593122482299805, |
| "learning_rate": 7.959183673469388e-06, |
| "loss": 1.2562, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.8163265306122449, |
| "grad_norm": 7.927923679351807, |
| "learning_rate": 8.06122448979592e-06, |
| "loss": 0.1105, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.826530612244898, |
| "grad_norm": 156.017333984375, |
| "learning_rate": 8.16326530612245e-06, |
| "loss": 4.0241, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.8367346938775511, |
| "grad_norm": 116.1068344116211, |
| "learning_rate": 8.26530612244898e-06, |
| "loss": 1.655, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.8469387755102041, |
| "grad_norm": 3.5885918140411377, |
| "learning_rate": 8.36734693877551e-06, |
| "loss": 0.0406, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.3218710422515869, |
| "learning_rate": 8.469387755102042e-06, |
| "loss": 0.0033, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.8673469387755102, |
| "grad_norm": 156.8653564453125, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 3.2183, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.8775510204081632, |
| "grad_norm": 19.75959587097168, |
| "learning_rate": 8.673469387755103e-06, |
| "loss": 0.1812, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.8877551020408163, |
| "grad_norm": 9.409197807312012, |
| "learning_rate": 8.775510204081633e-06, |
| "loss": 0.2222, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.8979591836734694, |
| "grad_norm": 44.61834716796875, |
| "learning_rate": 8.877551020408163e-06, |
| "loss": 0.6726, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.9081632653061225, |
| "grad_norm": 98.52664947509766, |
| "learning_rate": 8.979591836734695e-06, |
| "loss": 3.5891, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.9183673469387755, |
| "grad_norm": 27.778385162353516, |
| "learning_rate": 9.081632653061225e-06, |
| "loss": 0.3833, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.9285714285714286, |
| "grad_norm": 1.6346553564071655, |
| "learning_rate": 9.183673469387756e-06, |
| "loss": 0.0257, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.9387755102040817, |
| "grad_norm": 176.21873474121094, |
| "learning_rate": 9.285714285714288e-06, |
| "loss": 4.635, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.9489795918367347, |
| "grad_norm": 93.562255859375, |
| "learning_rate": 9.387755102040818e-06, |
| "loss": 2.1625, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.9591836734693877, |
| "grad_norm": 20.919546127319336, |
| "learning_rate": 9.489795918367348e-06, |
| "loss": 0.3742, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.9693877551020408, |
| "grad_norm": 8.895219802856445, |
| "learning_rate": 9.591836734693878e-06, |
| "loss": 0.1946, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.9795918367346939, |
| "grad_norm": 17.501354217529297, |
| "learning_rate": 9.693877551020408e-06, |
| "loss": 0.2705, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.9897959183673469, |
| "grad_norm": 226.07272338867188, |
| "learning_rate": 9.795918367346939e-06, |
| "loss": 12.4745, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 81.55729675292969, |
| "learning_rate": 9.89795918367347e-06, |
| "loss": 1.718, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_dim_128_cosine_accuracy@1": 0.32714468629961585, |
| "eval_dim_128_cosine_accuracy@10": 0.4014084507042254, |
| "eval_dim_128_cosine_accuracy@3": 0.33098591549295775, |
| "eval_dim_128_cosine_accuracy@5": 0.36619718309859156, |
| "eval_dim_128_cosine_map@100": 0.41817891915077654, |
| "eval_dim_128_cosine_mrr@10": 0.33965281588520974, |
| "eval_dim_128_cosine_ndcg@10": 0.35251068430131405, |
| "eval_dim_128_cosine_precision@1": 0.32714468629961585, |
| "eval_dim_128_cosine_precision@10": 0.293213828425096, |
| "eval_dim_128_cosine_precision@3": 0.3260776781903542, |
| "eval_dim_128_cosine_precision@5": 0.3192061459667094, |
| "eval_dim_128_cosine_recall@1": 0.038265678793809856, |
| "eval_dim_128_cosine_recall@10": 0.2562091364663566, |
| "eval_dim_128_cosine_recall@3": 0.11220810535939996, |
| "eval_dim_128_cosine_recall@5": 0.17048689772564513, |
| "eval_dim_256_cosine_accuracy@1": 0.3700384122919334, |
| "eval_dim_256_cosine_accuracy@10": 0.44622279129321385, |
| "eval_dim_256_cosine_accuracy@3": 0.3758002560819462, |
| "eval_dim_256_cosine_accuracy@5": 0.4058898847631242, |
| "eval_dim_256_cosine_map@100": 0.45936107144286625, |
| "eval_dim_256_cosine_mrr@10": 0.3829908745401701, |
| "eval_dim_256_cosine_ndcg@10": 0.3963587963422467, |
| "eval_dim_256_cosine_precision@1": 0.3700384122919334, |
| "eval_dim_256_cosine_precision@10": 0.33565941101152363, |
| "eval_dim_256_cosine_precision@3": 0.36982501067008117, |
| "eval_dim_256_cosine_precision@5": 0.36274007682458387, |
| "eval_dim_256_cosine_recall@1": 0.04011382918503575, |
| "eval_dim_256_cosine_recall@10": 0.2707413822992358, |
| "eval_dim_256_cosine_recall@3": 0.11768720228638535, |
| "eval_dim_256_cosine_recall@5": 0.17899109052239595, |
| "eval_dim_512_cosine_accuracy@1": 0.39308578745198464, |
| "eval_dim_512_cosine_accuracy@10": 0.4564660691421255, |
| "eval_dim_512_cosine_accuracy@3": 0.3975672215108835, |
| "eval_dim_512_cosine_accuracy@5": 0.42509603072983354, |
| "eval_dim_512_cosine_map@100": 0.47901702829368287, |
| "eval_dim_512_cosine_mrr@10": 0.4040218584232665, |
| "eval_dim_512_cosine_ndcg@10": 0.4158045483689078, |
| "eval_dim_512_cosine_precision@1": 0.39308578745198464, |
| "eval_dim_512_cosine_precision@10": 0.3489756722151088, |
| "eval_dim_512_cosine_precision@3": 0.3922321809645753, |
| "eval_dim_512_cosine_precision@5": 0.382842509603073, |
| "eval_dim_512_cosine_recall@1": 0.04316243704465245, |
| "eval_dim_512_cosine_recall@10": 0.2842422386776242, |
| "eval_dim_512_cosine_recall@3": 0.126195512947793, |
| "eval_dim_512_cosine_recall@5": 0.1909624031231601, |
| "eval_dim_64_cosine_accuracy@1": 0.2855313700384123, |
| "eval_dim_64_cosine_accuracy@10": 0.3559539052496799, |
| "eval_dim_64_cosine_accuracy@3": 0.29257362355953903, |
| "eval_dim_64_cosine_accuracy@5": 0.323303457106274, |
| "eval_dim_64_cosine_map@100": 0.37060921311190964, |
| "eval_dim_64_cosine_mrr@10": 0.2980323862366112, |
| "eval_dim_64_cosine_ndcg@10": 0.31079967856179397, |
| "eval_dim_64_cosine_precision@1": 0.2855313700384123, |
| "eval_dim_64_cosine_precision@10": 0.258258642765685, |
| "eval_dim_64_cosine_precision@3": 0.2861715749039693, |
| "eval_dim_64_cosine_precision@5": 0.28079385403329066, |
| "eval_dim_64_cosine_recall@1": 0.03346436650150152, |
| "eval_dim_64_cosine_recall@10": 0.22602132535917333, |
| "eval_dim_64_cosine_recall@3": 0.09850444385264467, |
| "eval_dim_64_cosine_recall@5": 0.14977503796339806, |
| "eval_dim_768_cosine_accuracy@1": 0.3975672215108835, |
| "eval_dim_768_cosine_accuracy@10": 0.47439180537772085, |
| "eval_dim_768_cosine_accuracy@3": 0.4026888604353393, |
| "eval_dim_768_cosine_accuracy@5": 0.4359795134443022, |
| "eval_dim_768_cosine_map@100": 0.48939446674087983, |
| "eval_dim_768_cosine_mrr@10": 0.41065941101152337, |
| "eval_dim_768_cosine_ndcg@10": 0.42417702900730875, |
| "eval_dim_768_cosine_precision@1": 0.3975672215108835, |
| "eval_dim_768_cosine_precision@10": 0.35678617157490394, |
| "eval_dim_768_cosine_precision@3": 0.3969270166453265, |
| "eval_dim_768_cosine_precision@5": 0.3884763124199744, |
| "eval_dim_768_cosine_recall@1": 0.044071996649064187, |
| "eval_dim_768_cosine_recall@10": 0.29232434166264154, |
| "eval_dim_768_cosine_recall@3": 0.12908414526605214, |
| "eval_dim_768_cosine_recall@5": 0.19569710007543972, |
| "eval_runtime": 186.9674, |
| "eval_samples_per_second": 0.0, |
| "eval_sequential_score": 0.31079967856179397, |
| "eval_steps_per_second": 0.0, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.010204081632653, |
| "grad_norm": 145.2915496826172, |
| "learning_rate": 1e-05, |
| "loss": 5.4827, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.0204081632653061, |
| "grad_norm": 188.6737060546875, |
| "learning_rate": 1.0102040816326531e-05, |
| "loss": 7.4285, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.030612244897959, |
| "grad_norm": 90.18331146240234, |
| "learning_rate": 1.0204081632653063e-05, |
| "loss": 2.6083, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.0408163265306123, |
| "grad_norm": 48.41734313964844, |
| "learning_rate": 1.0306122448979591e-05, |
| "loss": 0.2821, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.0510204081632653, |
| "grad_norm": 11.53239631652832, |
| "learning_rate": 1.0408163265306123e-05, |
| "loss": 0.2032, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.0612244897959184, |
| "grad_norm": 15.895277976989746, |
| "learning_rate": 1.0510204081632654e-05, |
| "loss": 0.2603, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.0714285714285714, |
| "grad_norm": 8.3611421585083, |
| "learning_rate": 1.0612244897959186e-05, |
| "loss": 0.0869, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.0816326530612246, |
| "grad_norm": 3.729483127593994, |
| "learning_rate": 1.0714285714285714e-05, |
| "loss": 0.0194, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.0918367346938775, |
| "grad_norm": 0.609831690788269, |
| "learning_rate": 1.0816326530612246e-05, |
| "loss": 0.0118, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.1020408163265305, |
| "grad_norm": 102.30455780029297, |
| "learning_rate": 1.0918367346938776e-05, |
| "loss": 3.5743, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.1122448979591837, |
| "grad_norm": 59.90740966796875, |
| "learning_rate": 1.1020408163265306e-05, |
| "loss": 0.5869, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.1224489795918366, |
| "grad_norm": 1.5409817695617676, |
| "learning_rate": 1.1122448979591838e-05, |
| "loss": 0.0305, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.1326530612244898, |
| "grad_norm": 24.44378662109375, |
| "learning_rate": 1.1224489795918367e-05, |
| "loss": 0.4096, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.1428571428571428, |
| "grad_norm": 77.41841888427734, |
| "learning_rate": 1.1326530612244899e-05, |
| "loss": 2.2927, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.153061224489796, |
| "grad_norm": 56.17146682739258, |
| "learning_rate": 1.1428571428571429e-05, |
| "loss": 1.5007, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.163265306122449, |
| "grad_norm": 59.88473129272461, |
| "learning_rate": 1.1530612244897961e-05, |
| "loss": 1.2148, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.1734693877551021, |
| "grad_norm": 0.20367565751075745, |
| "learning_rate": 1.1632653061224491e-05, |
| "loss": 0.0026, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.183673469387755, |
| "grad_norm": 38.7552490234375, |
| "learning_rate": 1.1734693877551021e-05, |
| "loss": 0.4087, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.193877551020408, |
| "grad_norm": 3.531527280807495, |
| "learning_rate": 1.1836734693877552e-05, |
| "loss": 0.0577, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.2040816326530612, |
| "grad_norm": 213.36203002929688, |
| "learning_rate": 1.1938775510204084e-05, |
| "loss": 5.2828, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.2142857142857142, |
| "grad_norm": 44.07740020751953, |
| "learning_rate": 1.2040816326530614e-05, |
| "loss": 0.5063, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.2244897959183674, |
| "grad_norm": 1.3714772462844849, |
| "learning_rate": 1.2142857142857142e-05, |
| "loss": 0.0159, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.2346938775510203, |
| "grad_norm": 0.10171513259410858, |
| "learning_rate": 1.2244897959183674e-05, |
| "loss": 0.0006, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.2448979591836735, |
| "grad_norm": 4.689838886260986, |
| "learning_rate": 1.2346938775510204e-05, |
| "loss": 0.0429, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.2551020408163265, |
| "grad_norm": 97.93582153320312, |
| "learning_rate": 1.2448979591836736e-05, |
| "loss": 1.1297, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.2653061224489797, |
| "grad_norm": 60.36366271972656, |
| "learning_rate": 1.2551020408163267e-05, |
| "loss": 0.9201, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.2755102040816326, |
| "grad_norm": 4.615835189819336, |
| "learning_rate": 1.2653061224489798e-05, |
| "loss": 0.0284, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.2857142857142856, |
| "grad_norm": 96.12593841552734, |
| "learning_rate": 1.2755102040816327e-05, |
| "loss": 1.9473, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.2959183673469388, |
| "grad_norm": 4.051046371459961, |
| "learning_rate": 1.2857142857142859e-05, |
| "loss": 0.022, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.306122448979592, |
| "grad_norm": 0.5018266439437866, |
| "learning_rate": 1.2959183673469389e-05, |
| "loss": 0.0054, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.316326530612245, |
| "grad_norm": 6.450921058654785, |
| "learning_rate": 1.3061224489795918e-05, |
| "loss": 0.1004, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.3265306122448979, |
| "grad_norm": 2.8662984371185303, |
| "learning_rate": 1.316326530612245e-05, |
| "loss": 0.0276, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.336734693877551, |
| "grad_norm": 127.14810180664062, |
| "learning_rate": 1.326530612244898e-05, |
| "loss": 2.3906, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.346938775510204, |
| "grad_norm": 9.02206802368164, |
| "learning_rate": 1.3367346938775512e-05, |
| "loss": 0.0375, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.3571428571428572, |
| "grad_norm": 150.13641357421875, |
| "learning_rate": 1.3469387755102042e-05, |
| "loss": 4.9546, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.3673469387755102, |
| "grad_norm": 12.425887107849121, |
| "learning_rate": 1.3571428571428574e-05, |
| "loss": 0.1619, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.3775510204081631, |
| "grad_norm": 0.5521597862243652, |
| "learning_rate": 1.3673469387755102e-05, |
| "loss": 0.0087, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.3877551020408163, |
| "grad_norm": 22.825164794921875, |
| "learning_rate": 1.3775510204081634e-05, |
| "loss": 0.3457, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.3979591836734695, |
| "grad_norm": 5.346078872680664, |
| "learning_rate": 1.3877551020408165e-05, |
| "loss": 0.0816, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.4081632653061225, |
| "grad_norm": 59.271095275878906, |
| "learning_rate": 1.3979591836734696e-05, |
| "loss": 1.1452, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.4183673469387754, |
| "grad_norm": 62.906166076660156, |
| "learning_rate": 1.4081632653061225e-05, |
| "loss": 0.5385, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 9.781693458557129, |
| "learning_rate": 1.4183673469387755e-05, |
| "loss": 0.1222, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.4387755102040816, |
| "grad_norm": 21.102067947387695, |
| "learning_rate": 1.4285714285714287e-05, |
| "loss": 0.3915, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.4489795918367347, |
| "grad_norm": 89.99552917480469, |
| "learning_rate": 1.4387755102040817e-05, |
| "loss": 3.0359, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.4591836734693877, |
| "grad_norm": 18.76301383972168, |
| "learning_rate": 1.448979591836735e-05, |
| "loss": 0.2768, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.469387755102041, |
| "grad_norm": 50.16675567626953, |
| "learning_rate": 1.4591836734693878e-05, |
| "loss": 0.6184, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.4795918367346939, |
| "grad_norm": 133.0209503173828, |
| "learning_rate": 1.469387755102041e-05, |
| "loss": 2.7128, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.489795918367347, |
| "grad_norm": 29.245777130126953, |
| "learning_rate": 1.479591836734694e-05, |
| "loss": 0.2769, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.3684585988521576, |
| "learning_rate": 1.4897959183673472e-05, |
| "loss": 0.0037, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.510204081632653, |
| "grad_norm": 109.48973083496094, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 1.0417, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.5204081632653061, |
| "grad_norm": 64.82876586914062, |
| "learning_rate": 1.510204081632653e-05, |
| "loss": 1.4451, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.5306122448979593, |
| "grad_norm": 250.61949157714844, |
| "learning_rate": 1.5204081632653063e-05, |
| "loss": 6.425, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.5408163265306123, |
| "grad_norm": 34.68476867675781, |
| "learning_rate": 1.530612244897959e-05, |
| "loss": 0.3295, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.5510204081632653, |
| "grad_norm": 1.7814018726348877, |
| "learning_rate": 1.5408163265306123e-05, |
| "loss": 0.0203, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.5612244897959182, |
| "grad_norm": 1.4509575366973877, |
| "learning_rate": 1.5510204081632655e-05, |
| "loss": 0.0204, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.5714285714285714, |
| "grad_norm": 0.14146992564201355, |
| "learning_rate": 1.5612244897959187e-05, |
| "loss": 0.0023, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.5816326530612246, |
| "grad_norm": 15.823746681213379, |
| "learning_rate": 1.5714285714285715e-05, |
| "loss": 0.1413, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.5918367346938775, |
| "grad_norm": 63.16089630126953, |
| "learning_rate": 1.5816326530612247e-05, |
| "loss": 1.0637, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.6020408163265305, |
| "grad_norm": 12.253658294677734, |
| "learning_rate": 1.5918367346938776e-05, |
| "loss": 0.1995, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.6122448979591837, |
| "grad_norm": 13.50799560546875, |
| "learning_rate": 1.6020408163265308e-05, |
| "loss": 0.0941, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.6224489795918369, |
| "grad_norm": 98.67269134521484, |
| "learning_rate": 1.612244897959184e-05, |
| "loss": 3.9788, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.6326530612244898, |
| "grad_norm": 41.64649963378906, |
| "learning_rate": 1.6224489795918368e-05, |
| "loss": 0.5844, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.6428571428571428, |
| "grad_norm": 141.99420166015625, |
| "learning_rate": 1.63265306122449e-05, |
| "loss": 3.5071, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.6530612244897958, |
| "grad_norm": 238.89817810058594, |
| "learning_rate": 1.642857142857143e-05, |
| "loss": 7.8894, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.663265306122449, |
| "grad_norm": 81.87030029296875, |
| "learning_rate": 1.653061224489796e-05, |
| "loss": 3.4079, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.6734693877551021, |
| "grad_norm": 178.0208740234375, |
| "learning_rate": 1.6632653061224492e-05, |
| "loss": 7.5755, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.683673469387755, |
| "grad_norm": 41.76604461669922, |
| "learning_rate": 1.673469387755102e-05, |
| "loss": 0.7972, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.693877551020408, |
| "grad_norm": 0.7283464670181274, |
| "learning_rate": 1.6836734693877553e-05, |
| "loss": 0.0106, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.7040816326530612, |
| "grad_norm": 66.69834899902344, |
| "learning_rate": 1.6938775510204085e-05, |
| "loss": 0.5323, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.7142857142857144, |
| "grad_norm": 1.6421160697937012, |
| "learning_rate": 1.7040816326530613e-05, |
| "loss": 0.0157, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.7244897959183674, |
| "grad_norm": 87.25151824951172, |
| "learning_rate": 1.7142857142857142e-05, |
| "loss": 1.2181, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.7346938775510203, |
| "grad_norm": 1.3664076328277588, |
| "learning_rate": 1.7244897959183674e-05, |
| "loss": 0.0096, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.7448979591836735, |
| "grad_norm": 2.5815272331237793, |
| "learning_rate": 1.7346938775510206e-05, |
| "loss": 0.0152, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.7551020408163265, |
| "grad_norm": 6.008739471435547, |
| "learning_rate": 1.7448979591836738e-05, |
| "loss": 0.068, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.7653061224489797, |
| "grad_norm": 0.22308386862277985, |
| "learning_rate": 1.7551020408163266e-05, |
| "loss": 0.0014, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.7755102040816326, |
| "grad_norm": 0.37313106656074524, |
| "learning_rate": 1.7653061224489798e-05, |
| "loss": 0.0034, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.7857142857142856, |
| "grad_norm": 0.0778372585773468, |
| "learning_rate": 1.7755102040816327e-05, |
| "loss": 0.0006, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.7959183673469388, |
| "grad_norm": 32.566566467285156, |
| "learning_rate": 1.785714285714286e-05, |
| "loss": 0.4503, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.806122448979592, |
| "grad_norm": 117.00394439697266, |
| "learning_rate": 1.795918367346939e-05, |
| "loss": 4.1669, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.816326530612245, |
| "grad_norm": 42.20817565917969, |
| "learning_rate": 1.806122448979592e-05, |
| "loss": 0.6081, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.8265306122448979, |
| "grad_norm": 144.17698669433594, |
| "learning_rate": 1.816326530612245e-05, |
| "loss": 2.4056, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.836734693877551, |
| "grad_norm": 45.562618255615234, |
| "learning_rate": 1.826530612244898e-05, |
| "loss": 0.5261, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.8469387755102042, |
| "grad_norm": 48.302433013916016, |
| "learning_rate": 1.836734693877551e-05, |
| "loss": 0.2616, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.8571428571428572, |
| "grad_norm": 14.456125259399414, |
| "learning_rate": 1.8469387755102043e-05, |
| "loss": 0.2859, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.8673469387755102, |
| "grad_norm": 137.47515869140625, |
| "learning_rate": 1.8571428571428575e-05, |
| "loss": 6.4765, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.8775510204081631, |
| "grad_norm": 1.0571497678756714, |
| "learning_rate": 1.8673469387755104e-05, |
| "loss": 0.0109, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.8877551020408163, |
| "grad_norm": 0.37840384244918823, |
| "learning_rate": 1.8775510204081636e-05, |
| "loss": 0.0034, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.8979591836734695, |
| "grad_norm": 16.713756561279297, |
| "learning_rate": 1.8877551020408164e-05, |
| "loss": 0.1816, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.9081632653061225, |
| "grad_norm": 3.689077138900757, |
| "learning_rate": 1.8979591836734696e-05, |
| "loss": 0.039, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.9183673469387754, |
| "grad_norm": 1.6388179063796997, |
| "learning_rate": 1.9081632653061225e-05, |
| "loss": 0.0239, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.9285714285714286, |
| "grad_norm": 118.65974426269531, |
| "learning_rate": 1.9183673469387756e-05, |
| "loss": 2.548, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.9387755102040818, |
| "grad_norm": 75.87044525146484, |
| "learning_rate": 1.928571428571429e-05, |
| "loss": 1.4144, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.9489795918367347, |
| "grad_norm": 0.3555060923099518, |
| "learning_rate": 1.9387755102040817e-05, |
| "loss": 0.0047, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.9591836734693877, |
| "grad_norm": 0.8721001744270325, |
| "learning_rate": 1.948979591836735e-05, |
| "loss": 0.0127, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.9693877551020407, |
| "grad_norm": 45.04741668701172, |
| "learning_rate": 1.9591836734693877e-05, |
| "loss": 2.928, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.9795918367346939, |
| "grad_norm": 0.20335637032985687, |
| "learning_rate": 1.969387755102041e-05, |
| "loss": 0.0012, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.989795918367347, |
| "grad_norm": 11.921625137329102, |
| "learning_rate": 1.979591836734694e-05, |
| "loss": 0.1156, |
| "step": 195 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.006426109466701746, |
| "learning_rate": 1.9897959183673473e-05, |
| "loss": 0.0001, |
| "step": 196 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_dim_128_cosine_accuracy@1": 0.32842509603072984, |
| "eval_dim_128_cosine_accuracy@10": 0.4007682458386684, |
| "eval_dim_128_cosine_accuracy@3": 0.33354673495518566, |
| "eval_dim_128_cosine_accuracy@5": 0.35979513444302175, |
| "eval_dim_128_cosine_map@100": 0.4185928335134121, |
| "eval_dim_128_cosine_mrr@10": 0.34048990915188065, |
| "eval_dim_128_cosine_ndcg@10": 0.3532158341818938, |
| "eval_dim_128_cosine_precision@1": 0.32842509603072984, |
| "eval_dim_128_cosine_precision@10": 0.29206145966709346, |
| "eval_dim_128_cosine_precision@3": 0.3282116944088775, |
| "eval_dim_128_cosine_precision@5": 0.3201024327784891, |
| "eval_dim_128_cosine_recall@1": 0.03826623805026492, |
| "eval_dim_128_cosine_recall@10": 0.2557427522394185, |
| "eval_dim_128_cosine_recall@3": 0.11237350584970618, |
| "eval_dim_128_cosine_recall@5": 0.1705675982839957, |
| "eval_dim_256_cosine_accuracy@1": 0.3706786171574904, |
| "eval_dim_256_cosine_accuracy@10": 0.44430217669654287, |
| "eval_dim_256_cosine_accuracy@3": 0.37708066581306016, |
| "eval_dim_256_cosine_accuracy@5": 0.4026888604353393, |
| "eval_dim_256_cosine_map@100": 0.4581292876731238, |
| "eval_dim_256_cosine_mrr@10": 0.3829479401662495, |
| "eval_dim_256_cosine_ndcg@10": 0.3954192054513208, |
| "eval_dim_256_cosine_precision@1": 0.3706786171574904, |
| "eval_dim_256_cosine_precision@10": 0.33476312419974397, |
| "eval_dim_256_cosine_precision@3": 0.3704652155356381, |
| "eval_dim_256_cosine_precision@5": 0.36235595390524966, |
| "eval_dim_256_cosine_recall@1": 0.04010394552921631, |
| "eval_dim_256_cosine_recall@10": 0.2702458225766465, |
| "eval_dim_256_cosine_recall@3": 0.11731644582817367, |
| "eval_dim_256_cosine_recall@5": 0.17765428439223818, |
| "eval_dim_512_cosine_accuracy@1": 0.3886043533930858, |
| "eval_dim_512_cosine_accuracy@10": 0.4622279129321383, |
| "eval_dim_512_cosine_accuracy@3": 0.39500640204865556, |
| "eval_dim_512_cosine_accuracy@5": 0.4225352112676056, |
| "eval_dim_512_cosine_map@100": 0.47243004222010543, |
| "eval_dim_512_cosine_mrr@10": 0.40112569355527056, |
| "eval_dim_512_cosine_ndcg@10": 0.41406124757842494, |
| "eval_dim_512_cosine_precision@1": 0.3886043533930858, |
| "eval_dim_512_cosine_precision@10": 0.34935979513444304, |
| "eval_dim_512_cosine_precision@3": 0.38839095177123345, |
| "eval_dim_512_cosine_precision@5": 0.3797695262483995, |
| "eval_dim_512_cosine_recall@1": 0.04209995734458549, |
| "eval_dim_512_cosine_recall@10": 0.28234778004041294, |
| "eval_dim_512_cosine_recall@3": 0.12343316714240367, |
| "eval_dim_512_cosine_recall@5": 0.1874852461853036, |
| "eval_dim_64_cosine_accuracy@1": 0.2887323943661972, |
| "eval_dim_64_cosine_accuracy@10": 0.35147247119078107, |
| "eval_dim_64_cosine_accuracy@3": 0.29769526248399486, |
| "eval_dim_64_cosine_accuracy@5": 0.3213828425096031, |
| "eval_dim_64_cosine_map@100": 0.36977389907621383, |
| "eval_dim_64_cosine_mrr@10": 0.3001796130317253, |
| "eval_dim_64_cosine_ndcg@10": 0.3121069409877711, |
| "eval_dim_64_cosine_precision@1": 0.2887323943661972, |
| "eval_dim_64_cosine_precision@10": 0.25448143405889884, |
| "eval_dim_64_cosine_precision@3": 0.28958600085360653, |
| "eval_dim_64_cosine_precision@5": 0.2827144686299616, |
| "eval_dim_64_cosine_recall@1": 0.03545580301027414, |
| "eval_dim_64_cosine_recall@10": 0.23272201386526145, |
| "eval_dim_64_cosine_recall@3": 0.10416703458132823, |
| "eval_dim_64_cosine_recall@5": 0.15778648755468183, |
| "eval_dim_768_cosine_accuracy@1": 0.39500640204865556, |
| "eval_dim_768_cosine_accuracy@10": 0.4667093469910371, |
| "eval_dim_768_cosine_accuracy@3": 0.4039692701664533, |
| "eval_dim_768_cosine_accuracy@5": 0.4327784891165173, |
| "eval_dim_768_cosine_map@100": 0.48082492057308085, |
| "eval_dim_768_cosine_mrr@10": 0.40793625388695776, |
| "eval_dim_768_cosine_ndcg@10": 0.4221720847747187, |
| "eval_dim_768_cosine_precision@1": 0.39500640204865556, |
| "eval_dim_768_cosine_precision@10": 0.3571702944942382, |
| "eval_dim_768_cosine_precision@3": 0.39564660691421255, |
| "eval_dim_768_cosine_precision@5": 0.3882202304737516, |
| "eval_dim_768_cosine_recall@1": 0.04243158779272862, |
| "eval_dim_768_cosine_recall@10": 0.289899399385322, |
| "eval_dim_768_cosine_recall@3": 0.1245655383778341, |
| "eval_dim_768_cosine_recall@5": 0.1902548634299299, |
| "eval_runtime": 183.3777, |
| "eval_samples_per_second": 0.0, |
| "eval_sequential_score": 0.3121069409877711, |
| "eval_steps_per_second": 0.0, |
| "step": 196 |
| }, |
| { |
| "epoch": 2.010204081632653, |
| "grad_norm": 19.43300437927246, |
| "learning_rate": 2e-05, |
| "loss": 0.768, |
| "step": 197 |
| }, |
| { |
| "epoch": 2.020408163265306, |
| "grad_norm": 0.857424259185791, |
| "learning_rate": 1.9999984141121447e-05, |
| "loss": 0.0073, |
| "step": 198 |
| }, |
| { |
| "epoch": 2.0306122448979593, |
| "grad_norm": 126.48216247558594, |
| "learning_rate": 1.9999936564536085e-05, |
| "loss": 1.6622, |
| "step": 199 |
| }, |
| { |
| "epoch": 2.0408163265306123, |
| "grad_norm": 0.030239321291446686, |
| "learning_rate": 1.9999857270394818e-05, |
| "loss": 0.0003, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.0510204081632653, |
| "grad_norm": 3.846285343170166, |
| "learning_rate": 1.9999746258949146e-05, |
| "loss": 0.0398, |
| "step": 201 |
| }, |
| { |
| "epoch": 2.061224489795918, |
| "grad_norm": 0.007493811659514904, |
| "learning_rate": 1.9999603530551178e-05, |
| "loss": 0.0001, |
| "step": 202 |
| }, |
| { |
| "epoch": 2.0714285714285716, |
| "grad_norm": 33.33332061767578, |
| "learning_rate": 1.999942908565361e-05, |
| "loss": 0.3767, |
| "step": 203 |
| }, |
| { |
| "epoch": 2.0816326530612246, |
| "grad_norm": 25.518348693847656, |
| "learning_rate": 1.999922292480975e-05, |
| "loss": 0.4468, |
| "step": 204 |
| }, |
| { |
| "epoch": 2.0918367346938775, |
| "grad_norm": 10.444570541381836, |
| "learning_rate": 1.9998985048673486e-05, |
| "loss": 0.1021, |
| "step": 205 |
| }, |
| { |
| "epoch": 2.1020408163265305, |
| "grad_norm": 106.1369857788086, |
| "learning_rate": 1.9998715457999313e-05, |
| "loss": 1.5802, |
| "step": 206 |
| }, |
| { |
| "epoch": 2.1122448979591835, |
| "grad_norm": 18.592628479003906, |
| "learning_rate": 1.999841415364231e-05, |
| "loss": 0.1798, |
| "step": 207 |
| }, |
| { |
| "epoch": 2.122448979591837, |
| "grad_norm": 0.11251077055931091, |
| "learning_rate": 1.999808113655815e-05, |
| "loss": 0.0015, |
| "step": 208 |
| }, |
| { |
| "epoch": 2.13265306122449, |
| "grad_norm": 0.73614501953125, |
| "learning_rate": 1.999771640780308e-05, |
| "loss": 0.0055, |
| "step": 209 |
| }, |
| { |
| "epoch": 2.142857142857143, |
| "grad_norm": 54.640098571777344, |
| "learning_rate": 1.999731996853395e-05, |
| "loss": 0.6201, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.1530612244897958, |
| "grad_norm": 79.83525848388672, |
| "learning_rate": 1.9996891820008165e-05, |
| "loss": 1.263, |
| "step": 211 |
| }, |
| { |
| "epoch": 2.163265306122449, |
| "grad_norm": 1.2913179397583008, |
| "learning_rate": 1.9996431963583724e-05, |
| "loss": 0.0194, |
| "step": 212 |
| }, |
| { |
| "epoch": 2.173469387755102, |
| "grad_norm": 0.05232042446732521, |
| "learning_rate": 1.9995940400719184e-05, |
| "loss": 0.0005, |
| "step": 213 |
| }, |
| { |
| "epoch": 2.183673469387755, |
| "grad_norm": 247.92056274414062, |
| "learning_rate": 1.9995417132973674e-05, |
| "loss": 10.7772, |
| "step": 214 |
| }, |
| { |
| "epoch": 2.193877551020408, |
| "grad_norm": 79.94287872314453, |
| "learning_rate": 1.999486216200688e-05, |
| "loss": 1.4789, |
| "step": 215 |
| }, |
| { |
| "epoch": 2.204081632653061, |
| "grad_norm": 31.259197235107422, |
| "learning_rate": 1.999427548957905e-05, |
| "loss": 0.3912, |
| "step": 216 |
| }, |
| { |
| "epoch": 2.2142857142857144, |
| "grad_norm": 31.51141357421875, |
| "learning_rate": 1.9993657117550972e-05, |
| "loss": 0.2786, |
| "step": 217 |
| }, |
| { |
| "epoch": 2.2244897959183674, |
| "grad_norm": 39.90089797973633, |
| "learning_rate": 1.9993007047883988e-05, |
| "loss": 0.6376, |
| "step": 218 |
| }, |
| { |
| "epoch": 2.2346938775510203, |
| "grad_norm": 0.3418372869491577, |
| "learning_rate": 1.999232528263997e-05, |
| "loss": 0.0059, |
| "step": 219 |
| }, |
| { |
| "epoch": 2.2448979591836733, |
| "grad_norm": 74.24694061279297, |
| "learning_rate": 1.9991611823981322e-05, |
| "loss": 1.3822, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.2551020408163267, |
| "grad_norm": 101.86847686767578, |
| "learning_rate": 1.9990866674170984e-05, |
| "loss": 1.2364, |
| "step": 221 |
| }, |
| { |
| "epoch": 2.2653061224489797, |
| "grad_norm": 160.41319274902344, |
| "learning_rate": 1.99900898355724e-05, |
| "loss": 2.8296, |
| "step": 222 |
| }, |
| { |
| "epoch": 2.2755102040816326, |
| "grad_norm": 42.205142974853516, |
| "learning_rate": 1.9989281310649516e-05, |
| "loss": 0.47, |
| "step": 223 |
| }, |
| { |
| "epoch": 2.2857142857142856, |
| "grad_norm": 78.7785415649414, |
| "learning_rate": 1.9988441101966807e-05, |
| "loss": 1.2266, |
| "step": 224 |
| }, |
| { |
| "epoch": 2.295918367346939, |
| "grad_norm": 1.0640811920166016, |
| "learning_rate": 1.9987569212189224e-05, |
| "loss": 0.0115, |
| "step": 225 |
| }, |
| { |
| "epoch": 2.306122448979592, |
| "grad_norm": 3.0871458053588867, |
| "learning_rate": 1.9986665644082204e-05, |
| "loss": 0.017, |
| "step": 226 |
| }, |
| { |
| "epoch": 2.316326530612245, |
| "grad_norm": 2.008272647857666, |
| "learning_rate": 1.9985730400511658e-05, |
| "loss": 0.0165, |
| "step": 227 |
| }, |
| { |
| "epoch": 2.326530612244898, |
| "grad_norm": 7.105755805969238, |
| "learning_rate": 1.998476348444397e-05, |
| "loss": 0.0807, |
| "step": 228 |
| }, |
| { |
| "epoch": 2.336734693877551, |
| "grad_norm": 42.1352424621582, |
| "learning_rate": 1.998376489894599e-05, |
| "loss": 0.3864, |
| "step": 229 |
| }, |
| { |
| "epoch": 2.3469387755102042, |
| "grad_norm": 11.61186408996582, |
| "learning_rate": 1.9982734647184997e-05, |
| "loss": 0.2179, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.357142857142857, |
| "grad_norm": 380.5256042480469, |
| "learning_rate": 1.998167273242872e-05, |
| "loss": 9.596, |
| "step": 231 |
| }, |
| { |
| "epoch": 2.36734693877551, |
| "grad_norm": 219.15957641601562, |
| "learning_rate": 1.9980579158045322e-05, |
| "loss": 3.8921, |
| "step": 232 |
| }, |
| { |
| "epoch": 2.377551020408163, |
| "grad_norm": 7.717984676361084, |
| "learning_rate": 1.9979453927503366e-05, |
| "loss": 0.0677, |
| "step": 233 |
| }, |
| { |
| "epoch": 2.387755102040816, |
| "grad_norm": 1.607508897781372, |
| "learning_rate": 1.9978297044371834e-05, |
| "loss": 0.0184, |
| "step": 234 |
| }, |
| { |
| "epoch": 2.3979591836734695, |
| "grad_norm": 11.009926795959473, |
| "learning_rate": 1.9977108512320103e-05, |
| "loss": 0.1947, |
| "step": 235 |
| }, |
| { |
| "epoch": 2.4081632653061225, |
| "grad_norm": 43.153709411621094, |
| "learning_rate": 1.9975888335117927e-05, |
| "loss": 0.5775, |
| "step": 236 |
| }, |
| { |
| "epoch": 2.4183673469387754, |
| "grad_norm": 27.642406463623047, |
| "learning_rate": 1.9974636516635436e-05, |
| "loss": 0.1769, |
| "step": 237 |
| }, |
| { |
| "epoch": 2.4285714285714284, |
| "grad_norm": 1.5701595544815063, |
| "learning_rate": 1.9973353060843118e-05, |
| "loss": 0.0112, |
| "step": 238 |
| }, |
| { |
| "epoch": 2.438775510204082, |
| "grad_norm": 312.0637512207031, |
| "learning_rate": 1.9972037971811802e-05, |
| "loss": 9.3438, |
| "step": 239 |
| }, |
| { |
| "epoch": 2.4489795918367347, |
| "grad_norm": 8.197367668151855, |
| "learning_rate": 1.9970691253712663e-05, |
| "loss": 0.092, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.4591836734693877, |
| "grad_norm": 38.18027114868164, |
| "learning_rate": 1.9969312910817183e-05, |
| "loss": 0.8527, |
| "step": 241 |
| }, |
| { |
| "epoch": 2.4693877551020407, |
| "grad_norm": 16.030864715576172, |
| "learning_rate": 1.9967902947497158e-05, |
| "loss": 0.1134, |
| "step": 242 |
| }, |
| { |
| "epoch": 2.479591836734694, |
| "grad_norm": 0.02204311266541481, |
| "learning_rate": 1.9966461368224676e-05, |
| "loss": 0.0002, |
| "step": 243 |
| }, |
| { |
| "epoch": 2.489795918367347, |
| "grad_norm": 0.6758233904838562, |
| "learning_rate": 1.9964988177572106e-05, |
| "loss": 0.0092, |
| "step": 244 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.1679491549730301, |
| "learning_rate": 1.996348338021207e-05, |
| "loss": 0.002, |
| "step": 245 |
| }, |
| { |
| "epoch": 2.510204081632653, |
| "grad_norm": 124.84944152832031, |
| "learning_rate": 1.9961946980917457e-05, |
| "loss": 9.4742, |
| "step": 246 |
| }, |
| { |
| "epoch": 2.520408163265306, |
| "grad_norm": 304.3159484863281, |
| "learning_rate": 1.9960378984561377e-05, |
| "loss": 8.5164, |
| "step": 247 |
| }, |
| { |
| "epoch": 2.5306122448979593, |
| "grad_norm": 137.6317138671875, |
| "learning_rate": 1.9958779396117162e-05, |
| "loss": 2.4357, |
| "step": 248 |
| }, |
| { |
| "epoch": 2.5408163265306123, |
| "grad_norm": 88.31053161621094, |
| "learning_rate": 1.9957148220658348e-05, |
| "loss": 1.1891, |
| "step": 249 |
| }, |
| { |
| "epoch": 2.5510204081632653, |
| "grad_norm": 270.725341796875, |
| "learning_rate": 1.9955485463358655e-05, |
| "loss": 4.1178, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.561224489795918, |
| "grad_norm": 0.07664936035871506, |
| "learning_rate": 1.9953791129491985e-05, |
| "loss": 0.001, |
| "step": 251 |
| }, |
| { |
| "epoch": 2.571428571428571, |
| "grad_norm": 17.77437400817871, |
| "learning_rate": 1.9952065224432376e-05, |
| "loss": 0.1828, |
| "step": 252 |
| }, |
| { |
| "epoch": 2.5816326530612246, |
| "grad_norm": 208.72927856445312, |
| "learning_rate": 1.9950307753654016e-05, |
| "loss": 4.9505, |
| "step": 253 |
| }, |
| { |
| "epoch": 2.5918367346938775, |
| "grad_norm": 77.77067565917969, |
| "learning_rate": 1.9948518722731208e-05, |
| "loss": 0.8772, |
| "step": 254 |
| }, |
| { |
| "epoch": 2.6020408163265305, |
| "grad_norm": 4.941845893859863, |
| "learning_rate": 1.9946698137338357e-05, |
| "loss": 0.054, |
| "step": 255 |
| }, |
| { |
| "epoch": 2.612244897959184, |
| "grad_norm": 79.66358947753906, |
| "learning_rate": 1.994484600324995e-05, |
| "loss": 1.2223, |
| "step": 256 |
| }, |
| { |
| "epoch": 2.622448979591837, |
| "grad_norm": 49.39836120605469, |
| "learning_rate": 1.994296232634054e-05, |
| "loss": 0.5202, |
| "step": 257 |
| }, |
| { |
| "epoch": 2.63265306122449, |
| "grad_norm": 0.2510567009449005, |
| "learning_rate": 1.994104711258473e-05, |
| "loss": 0.002, |
| "step": 258 |
| }, |
| { |
| "epoch": 2.642857142857143, |
| "grad_norm": 0.1556750386953354, |
| "learning_rate": 1.9939100368057144e-05, |
| "loss": 0.0017, |
| "step": 259 |
| }, |
| { |
| "epoch": 2.6530612244897958, |
| "grad_norm": 0.1744566559791565, |
| "learning_rate": 1.9937122098932428e-05, |
| "loss": 0.0026, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.663265306122449, |
| "grad_norm": 64.05052947998047, |
| "learning_rate": 1.99351123114852e-05, |
| "loss": 0.4856, |
| "step": 261 |
| }, |
| { |
| "epoch": 2.673469387755102, |
| "grad_norm": 0.5290594100952148, |
| "learning_rate": 1.993307101209006e-05, |
| "loss": 0.0067, |
| "step": 262 |
| }, |
| { |
| "epoch": 2.683673469387755, |
| "grad_norm": 105.92308044433594, |
| "learning_rate": 1.993099820722155e-05, |
| "loss": 1.2193, |
| "step": 263 |
| }, |
| { |
| "epoch": 2.693877551020408, |
| "grad_norm": 77.6264877319336, |
| "learning_rate": 1.992889390345414e-05, |
| "loss": 2.4912, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.704081632653061, |
| "grad_norm": 0.30245572328567505, |
| "learning_rate": 1.9926758107462208e-05, |
| "loss": 0.0031, |
| "step": 265 |
| }, |
| { |
| "epoch": 2.7142857142857144, |
| "grad_norm": 41.0025749206543, |
| "learning_rate": 1.9924590826020027e-05, |
| "loss": 0.5973, |
| "step": 266 |
| }, |
| { |
| "epoch": 2.7244897959183674, |
| "grad_norm": 0.0627225786447525, |
| "learning_rate": 1.9922392066001724e-05, |
| "loss": 0.0007, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.7346938775510203, |
| "grad_norm": 47.067935943603516, |
| "learning_rate": 1.992016183438127e-05, |
| "loss": 1.3781, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.7448979591836737, |
| "grad_norm": 0.5052754878997803, |
| "learning_rate": 1.991790013823246e-05, |
| "loss": 0.0083, |
| "step": 269 |
| }, |
| { |
| "epoch": 2.7551020408163263, |
| "grad_norm": 0.01090068370103836, |
| "learning_rate": 1.9915606984728896e-05, |
| "loss": 0.0001, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.7653061224489797, |
| "grad_norm": 31.127262115478516, |
| "learning_rate": 1.9913282381143934e-05, |
| "loss": 0.2631, |
| "step": 271 |
| }, |
| { |
| "epoch": 2.7755102040816326, |
| "grad_norm": 7.258132457733154, |
| "learning_rate": 1.99109263348507e-05, |
| "loss": 0.0525, |
| "step": 272 |
| }, |
| { |
| "epoch": 2.7857142857142856, |
| "grad_norm": 0.05350486561655998, |
| "learning_rate": 1.9908538853322046e-05, |
| "loss": 0.0008, |
| "step": 273 |
| }, |
| { |
| "epoch": 2.795918367346939, |
| "grad_norm": 7.694390773773193, |
| "learning_rate": 1.9906119944130527e-05, |
| "loss": 0.0738, |
| "step": 274 |
| }, |
| { |
| "epoch": 2.806122448979592, |
| "grad_norm": 0.25629743933677673, |
| "learning_rate": 1.9903669614948382e-05, |
| "loss": 0.0019, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.816326530612245, |
| "grad_norm": 0.04774434119462967, |
| "learning_rate": 1.9901187873547504e-05, |
| "loss": 0.0008, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.826530612244898, |
| "grad_norm": 29.360733032226562, |
| "learning_rate": 1.9898674727799418e-05, |
| "loss": 0.4261, |
| "step": 277 |
| }, |
| { |
| "epoch": 2.836734693877551, |
| "grad_norm": 0.4614088535308838, |
| "learning_rate": 1.9896130185675263e-05, |
| "loss": 0.0072, |
| "step": 278 |
| }, |
| { |
| "epoch": 2.8469387755102042, |
| "grad_norm": 59.63426971435547, |
| "learning_rate": 1.9893554255245748e-05, |
| "loss": 1.9606, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 2.8956282138824463, |
| "learning_rate": 1.9890946944681157e-05, |
| "loss": 0.0348, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.86734693877551, |
| "grad_norm": 196.1266326904297, |
| "learning_rate": 1.9888308262251286e-05, |
| "loss": 0.1742, |
| "step": 281 |
| }, |
| { |
| "epoch": 2.877551020408163, |
| "grad_norm": 0.15560828149318695, |
| "learning_rate": 1.988563821632545e-05, |
| "loss": 0.0018, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.887755102040816, |
| "grad_norm": 18.213293075561523, |
| "learning_rate": 1.9882936815372432e-05, |
| "loss": 0.3129, |
| "step": 283 |
| }, |
| { |
| "epoch": 2.8979591836734695, |
| "grad_norm": 93.50361633300781, |
| "learning_rate": 1.9880204067960473e-05, |
| "loss": 0.3552, |
| "step": 284 |
| }, |
| { |
| "epoch": 2.9081632653061225, |
| "grad_norm": 88.30142974853516, |
| "learning_rate": 1.9877439982757228e-05, |
| "loss": 1.901, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.9183673469387754, |
| "grad_norm": 14.138421058654785, |
| "learning_rate": 1.9874644568529763e-05, |
| "loss": 0.1566, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.928571428571429, |
| "grad_norm": 1.8858366012573242, |
| "learning_rate": 1.9871817834144506e-05, |
| "loss": 0.0247, |
| "step": 287 |
| }, |
| { |
| "epoch": 2.938775510204082, |
| "grad_norm": 0.07699646055698395, |
| "learning_rate": 1.9868959788567213e-05, |
| "loss": 0.0009, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.9489795918367347, |
| "grad_norm": 0.006560923531651497, |
| "learning_rate": 1.9866070440862977e-05, |
| "loss": 0.0001, |
| "step": 289 |
| }, |
| { |
| "epoch": 2.9591836734693877, |
| "grad_norm": 0.0304582379758358, |
| "learning_rate": 1.9863149800196152e-05, |
| "loss": 0.0004, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.9693877551020407, |
| "grad_norm": 2.932767868041992, |
| "learning_rate": 1.9860197875830355e-05, |
| "loss": 0.0262, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.979591836734694, |
| "grad_norm": 5.625049591064453, |
| "learning_rate": 1.9857214677128436e-05, |
| "loss": 0.0334, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.989795918367347, |
| "grad_norm": 1.7222120761871338, |
| "learning_rate": 1.9854200213552426e-05, |
| "loss": 0.0146, |
| "step": 293 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.34878501296043396, |
| "learning_rate": 1.985115449466353e-05, |
| "loss": 0.0044, |
| "step": 294 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_dim_128_cosine_accuracy@1": 0.353393085787452, |
| "eval_dim_128_cosine_accuracy@10": 0.4186939820742638, |
| "eval_dim_128_cosine_accuracy@3": 0.3617157490396927, |
| "eval_dim_128_cosine_accuracy@5": 0.3879641485275288, |
| "eval_dim_128_cosine_map@100": 0.44283999281955866, |
| "eval_dim_128_cosine_mrr@10": 0.3652353515029568, |
| "eval_dim_128_cosine_ndcg@10": 0.3779175151946445, |
| "eval_dim_128_cosine_precision@1": 0.353393085787452, |
| "eval_dim_128_cosine_precision@10": 0.3151728553137004, |
| "eval_dim_128_cosine_precision@3": 0.3542466922748612, |
| "eval_dim_128_cosine_precision@5": 0.34609475032010245, |
| "eval_dim_128_cosine_recall@1": 0.03978744960200595, |
| "eval_dim_128_cosine_recall@10": 0.26555745666889424, |
| "eval_dim_128_cosine_recall@3": 0.117127816905993, |
| "eval_dim_128_cosine_recall@5": 0.17767259683097839, |
| "eval_dim_256_cosine_accuracy@1": 0.3905249679897567, |
| "eval_dim_256_cosine_accuracy@10": 0.45966709346991036, |
| "eval_dim_256_cosine_accuracy@3": 0.3994878361075544, |
| "eval_dim_256_cosine_accuracy@5": 0.4263764404609475, |
| "eval_dim_256_cosine_map@100": 0.4797247358039393, |
| "eval_dim_256_cosine_mrr@10": 0.4030391947645464, |
| "eval_dim_256_cosine_ndcg@10": 0.41529982696069817, |
| "eval_dim_256_cosine_precision@1": 0.3905249679897567, |
| "eval_dim_256_cosine_precision@10": 0.3473111395646607, |
| "eval_dim_256_cosine_precision@3": 0.39137857447716595, |
| "eval_dim_256_cosine_precision@5": 0.3824583866837389, |
| "eval_dim_256_cosine_recall@1": 0.04379643457341558, |
| "eval_dim_256_cosine_recall@10": 0.2849589544480218, |
| "eval_dim_256_cosine_recall@3": 0.12838062817408333, |
| "eval_dim_256_cosine_recall@5": 0.19365867195858333, |
| "eval_dim_512_cosine_accuracy@1": 0.41165172855313703, |
| "eval_dim_512_cosine_accuracy@10": 0.4820742637644046, |
| "eval_dim_512_cosine_accuracy@3": 0.4206145966709347, |
| "eval_dim_512_cosine_accuracy@5": 0.4513444302176697, |
| "eval_dim_512_cosine_map@100": 0.5006190134140577, |
| "eval_dim_512_cosine_mrr@10": 0.42457192650854575, |
| "eval_dim_512_cosine_ndcg@10": 0.43828991743977486, |
| "eval_dim_512_cosine_precision@1": 0.41165172855313703, |
| "eval_dim_512_cosine_precision@10": 0.37048655569782335, |
| "eval_dim_512_cosine_precision@3": 0.41229193341869397, |
| "eval_dim_512_cosine_precision@5": 0.404225352112676, |
| "eval_dim_512_cosine_recall@1": 0.04453915204812793, |
| "eval_dim_512_cosine_recall@10": 0.2976368767832588, |
| "eval_dim_512_cosine_recall@3": 0.13075976756926563, |
| "eval_dim_512_cosine_recall@5": 0.19871584447147544, |
| "eval_dim_64_cosine_accuracy@1": 0.31241997439180536, |
| "eval_dim_64_cosine_accuracy@10": 0.3674775928297055, |
| "eval_dim_64_cosine_accuracy@3": 0.3181818181818182, |
| "eval_dim_64_cosine_accuracy@5": 0.33674775928297057, |
| "eval_dim_64_cosine_map@100": 0.3932871188436923, |
| "eval_dim_64_cosine_mrr@10": 0.32184292624433436, |
| "eval_dim_64_cosine_ndcg@10": 0.33149908528792255, |
| "eval_dim_64_cosine_precision@1": 0.31241997439180536, |
| "eval_dim_64_cosine_precision@10": 0.27151088348271446, |
| "eval_dim_64_cosine_precision@3": 0.31241997439180536, |
| "eval_dim_64_cosine_precision@5": 0.3029449423815621, |
| "eval_dim_64_cosine_recall@1": 0.03691005165108737, |
| "eval_dim_64_cosine_recall@10": 0.23779183482125327, |
| "eval_dim_64_cosine_recall@3": 0.1083213328481372, |
| "eval_dim_64_cosine_recall@5": 0.16275808863008476, |
| "eval_dim_768_cosine_accuracy@1": 0.4199743918053777, |
| "eval_dim_768_cosine_accuracy@10": 0.49551856594110116, |
| "eval_dim_768_cosine_accuracy@3": 0.42893725992317544, |
| "eval_dim_768_cosine_accuracy@5": 0.4622279129321383, |
| "eval_dim_768_cosine_map@100": 0.5084691302256611, |
| "eval_dim_768_cosine_mrr@10": 0.43367096518504944, |
| "eval_dim_768_cosine_ndcg@10": 0.44799498237634766, |
| "eval_dim_768_cosine_precision@1": 0.4199743918053777, |
| "eval_dim_768_cosine_precision@10": 0.3802176696542894, |
| "eval_dim_768_cosine_precision@3": 0.42040119504908235, |
| "eval_dim_768_cosine_precision@5": 0.41254801536491675, |
| "eval_dim_768_cosine_recall@1": 0.04510157469618885, |
| "eval_dim_768_cosine_recall@10": 0.30249531360222554, |
| "eval_dim_768_cosine_recall@3": 0.13229329889404273, |
| "eval_dim_768_cosine_recall@5": 0.2009019928625879, |
| "eval_runtime": 184.2103, |
| "eval_samples_per_second": 0.0, |
| "eval_sequential_score": 0.33149908528792255, |
| "eval_steps_per_second": 0.0, |
| "step": 294 |
| }, |
| { |
| "epoch": 3.010204081632653, |
| "grad_norm": 22.25138282775879, |
| "learning_rate": 1.9848077530122083e-05, |
| "loss": 0.2686, |
| "step": 295 |
| }, |
| { |
| "epoch": 3.020408163265306, |
| "grad_norm": 0.13433043658733368, |
| "learning_rate": 1.9844969329687526e-05, |
| "loss": 0.0008, |
| "step": 296 |
| }, |
| { |
| "epoch": 3.0306122448979593, |
| "grad_norm": 0.8659433126449585, |
| "learning_rate": 1.9841829903218377e-05, |
| "loss": 0.0106, |
| "step": 297 |
| }, |
| { |
| "epoch": 3.0408163265306123, |
| "grad_norm": 5.649078845977783, |
| "learning_rate": 1.983865926067219e-05, |
| "loss": 0.0551, |
| "step": 298 |
| }, |
| { |
| "epoch": 3.0510204081632653, |
| "grad_norm": 179.68861389160156, |
| "learning_rate": 1.983545741210553e-05, |
| "loss": 1.2816, |
| "step": 299 |
| }, |
| { |
| "epoch": 3.061224489795918, |
| "grad_norm": 0.24586763978004456, |
| "learning_rate": 1.9832224367673945e-05, |
| "loss": 0.002, |
| "step": 300 |
| }, |
| { |
| "epoch": 3.0714285714285716, |
| "grad_norm": 2.8069469928741455, |
| "learning_rate": 1.9828960137631927e-05, |
| "loss": 0.0406, |
| "step": 301 |
| }, |
| { |
| "epoch": 3.0816326530612246, |
| "grad_norm": 0.5202066898345947, |
| "learning_rate": 1.9825664732332886e-05, |
| "loss": 0.0081, |
| "step": 302 |
| }, |
| { |
| "epoch": 3.0918367346938775, |
| "grad_norm": 0.8059203028678894, |
| "learning_rate": 1.98223381622291e-05, |
| "loss": 0.0064, |
| "step": 303 |
| }, |
| { |
| "epoch": 3.1020408163265305, |
| "grad_norm": 0.45694583654403687, |
| "learning_rate": 1.9818980437871707e-05, |
| "loss": 0.0061, |
| "step": 304 |
| }, |
| { |
| "epoch": 3.1122448979591835, |
| "grad_norm": 29.932289123535156, |
| "learning_rate": 1.9815591569910654e-05, |
| "loss": 0.4775, |
| "step": 305 |
| }, |
| { |
| "epoch": 3.122448979591837, |
| "grad_norm": 37.35374069213867, |
| "learning_rate": 1.9812171569094675e-05, |
| "loss": 0.3185, |
| "step": 306 |
| }, |
| { |
| "epoch": 3.13265306122449, |
| "grad_norm": 0.9324603080749512, |
| "learning_rate": 1.980872044627124e-05, |
| "loss": 0.0105, |
| "step": 307 |
| }, |
| { |
| "epoch": 3.142857142857143, |
| "grad_norm": 0.01630260981619358, |
| "learning_rate": 1.980523821238654e-05, |
| "loss": 0.0001, |
| "step": 308 |
| }, |
| { |
| "epoch": 3.1530612244897958, |
| "grad_norm": 368.7991638183594, |
| "learning_rate": 1.9801724878485438e-05, |
| "loss": 10.5217, |
| "step": 309 |
| }, |
| { |
| "epoch": 3.163265306122449, |
| "grad_norm": 0.37166470289230347, |
| "learning_rate": 1.9798180455711445e-05, |
| "loss": 0.0041, |
| "step": 310 |
| }, |
| { |
| "epoch": 3.173469387755102, |
| "grad_norm": 12.572883605957031, |
| "learning_rate": 1.9794604955306668e-05, |
| "loss": 0.1077, |
| "step": 311 |
| }, |
| { |
| "epoch": 3.183673469387755, |
| "grad_norm": 6.274280071258545, |
| "learning_rate": 1.97909983886118e-05, |
| "loss": 0.0984, |
| "step": 312 |
| }, |
| { |
| "epoch": 3.193877551020408, |
| "grad_norm": 3.7283997535705566, |
| "learning_rate": 1.9787360767066054e-05, |
| "loss": 0.0279, |
| "step": 313 |
| }, |
| { |
| "epoch": 3.204081632653061, |
| "grad_norm": 0.09550534188747406, |
| "learning_rate": 1.9783692102207156e-05, |
| "loss": 0.0009, |
| "step": 314 |
| }, |
| { |
| "epoch": 3.2142857142857144, |
| "grad_norm": 9.320639610290527, |
| "learning_rate": 1.9779992405671284e-05, |
| "loss": 0.1379, |
| "step": 315 |
| }, |
| { |
| "epoch": 3.2244897959183674, |
| "grad_norm": 0.0006459120777435601, |
| "learning_rate": 1.977626168919305e-05, |
| "loss": 0.0, |
| "step": 316 |
| }, |
| { |
| "epoch": 3.2346938775510203, |
| "grad_norm": 0.02275056019425392, |
| "learning_rate": 1.977249996460544e-05, |
| "loss": 0.0003, |
| "step": 317 |
| }, |
| { |
| "epoch": 3.2448979591836733, |
| "grad_norm": 12.579317092895508, |
| "learning_rate": 1.976870724383981e-05, |
| "loss": 0.0852, |
| "step": 318 |
| }, |
| { |
| "epoch": 3.2551020408163267, |
| "grad_norm": 0.0938275083899498, |
| "learning_rate": 1.9764883538925822e-05, |
| "loss": 0.0015, |
| "step": 319 |
| }, |
| { |
| "epoch": 3.2653061224489797, |
| "grad_norm": 0.10724499076604843, |
| "learning_rate": 1.9761028861991406e-05, |
| "loss": 0.0011, |
| "step": 320 |
| }, |
| { |
| "epoch": 3.2755102040816326, |
| "grad_norm": 0.0515306331217289, |
| "learning_rate": 1.975714322526273e-05, |
| "loss": 0.0006, |
| "step": 321 |
| }, |
| { |
| "epoch": 3.2857142857142856, |
| "grad_norm": 61.047203063964844, |
| "learning_rate": 1.9753226641064164e-05, |
| "loss": 1.2658, |
| "step": 322 |
| }, |
| { |
| "epoch": 3.295918367346939, |
| "grad_norm": 8.448295593261719, |
| "learning_rate": 1.9749279121818235e-05, |
| "loss": 0.0457, |
| "step": 323 |
| }, |
| { |
| "epoch": 3.306122448979592, |
| "grad_norm": 1.0811445713043213, |
| "learning_rate": 1.974530068004559e-05, |
| "loss": 0.0111, |
| "step": 324 |
| }, |
| { |
| "epoch": 3.316326530612245, |
| "grad_norm": 43.832454681396484, |
| "learning_rate": 1.9741291328364955e-05, |
| "loss": 1.0571, |
| "step": 325 |
| }, |
| { |
| "epoch": 3.326530612244898, |
| "grad_norm": 0.005959355738013983, |
| "learning_rate": 1.973725107949309e-05, |
| "loss": 0.0001, |
| "step": 326 |
| }, |
| { |
| "epoch": 3.336734693877551, |
| "grad_norm": 0.15442679822444916, |
| "learning_rate": 1.973317994624476e-05, |
| "loss": 0.0014, |
| "step": 327 |
| }, |
| { |
| "epoch": 3.3469387755102042, |
| "grad_norm": 44.03955078125, |
| "learning_rate": 1.9729077941532687e-05, |
| "loss": 0.3352, |
| "step": 328 |
| }, |
| { |
| "epoch": 3.357142857142857, |
| "grad_norm": 66.37464141845703, |
| "learning_rate": 1.9724945078367513e-05, |
| "loss": 1.3782, |
| "step": 329 |
| }, |
| { |
| "epoch": 3.36734693877551, |
| "grad_norm": 3.3627383708953857, |
| "learning_rate": 1.9720781369857747e-05, |
| "loss": 0.008, |
| "step": 330 |
| }, |
| { |
| "epoch": 3.377551020408163, |
| "grad_norm": 0.06466995179653168, |
| "learning_rate": 1.9716586829209743e-05, |
| "loss": 0.0007, |
| "step": 331 |
| }, |
| { |
| "epoch": 3.387755102040816, |
| "grad_norm": 0.16937388479709625, |
| "learning_rate": 1.971236146972764e-05, |
| "loss": 0.0018, |
| "step": 332 |
| }, |
| { |
| "epoch": 3.3979591836734695, |
| "grad_norm": 26.774580001831055, |
| "learning_rate": 1.9708105304813333e-05, |
| "loss": 0.1579, |
| "step": 333 |
| }, |
| { |
| "epoch": 3.4081632653061225, |
| "grad_norm": 36.40068054199219, |
| "learning_rate": 1.970381834796642e-05, |
| "loss": 0.3014, |
| "step": 334 |
| }, |
| { |
| "epoch": 3.4183673469387754, |
| "grad_norm": 12.088098526000977, |
| "learning_rate": 1.969950061278417e-05, |
| "loss": 0.0626, |
| "step": 335 |
| }, |
| { |
| "epoch": 3.4285714285714284, |
| "grad_norm": 0.4224468469619751, |
| "learning_rate": 1.969515211296147e-05, |
| "loss": 0.0074, |
| "step": 336 |
| }, |
| { |
| "epoch": 3.438775510204082, |
| "grad_norm": 0.3234362006187439, |
| "learning_rate": 1.969077286229078e-05, |
| "loss": 0.002, |
| "step": 337 |
| }, |
| { |
| "epoch": 3.4489795918367347, |
| "grad_norm": 0.7104203701019287, |
| "learning_rate": 1.968636287466211e-05, |
| "loss": 0.0047, |
| "step": 338 |
| }, |
| { |
| "epoch": 3.4591836734693877, |
| "grad_norm": 4.555932521820068, |
| "learning_rate": 1.9681922164062945e-05, |
| "loss": 0.0601, |
| "step": 339 |
| }, |
| { |
| "epoch": 3.4693877551020407, |
| "grad_norm": 1.1395331621170044, |
| "learning_rate": 1.967745074457823e-05, |
| "loss": 0.0119, |
| "step": 340 |
| }, |
| { |
| "epoch": 3.479591836734694, |
| "grad_norm": 0.044391512870788574, |
| "learning_rate": 1.9672948630390296e-05, |
| "loss": 0.0003, |
| "step": 341 |
| }, |
| { |
| "epoch": 3.489795918367347, |
| "grad_norm": 1.9847981929779053, |
| "learning_rate": 1.9668415835778845e-05, |
| "loss": 0.0319, |
| "step": 342 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 1.8022907972335815, |
| "learning_rate": 1.9663852375120882e-05, |
| "loss": 0.024, |
| "step": 343 |
| }, |
| { |
| "epoch": 3.510204081632653, |
| "grad_norm": 0.36603665351867676, |
| "learning_rate": 1.9659258262890683e-05, |
| "loss": 0.0034, |
| "step": 344 |
| }, |
| { |
| "epoch": 3.520408163265306, |
| "grad_norm": 20.221256256103516, |
| "learning_rate": 1.9654633513659743e-05, |
| "loss": 0.1909, |
| "step": 345 |
| }, |
| { |
| "epoch": 3.5306122448979593, |
| "grad_norm": 4.926142692565918, |
| "learning_rate": 1.9649978142096726e-05, |
| "loss": 0.08, |
| "step": 346 |
| }, |
| { |
| "epoch": 3.5408163265306123, |
| "grad_norm": 0.02573891542851925, |
| "learning_rate": 1.9645292162967426e-05, |
| "loss": 0.0003, |
| "step": 347 |
| }, |
| { |
| "epoch": 3.5510204081632653, |
| "grad_norm": 3.7188498973846436, |
| "learning_rate": 1.964057559113472e-05, |
| "loss": 0.0396, |
| "step": 348 |
| }, |
| { |
| "epoch": 3.561224489795918, |
| "grad_norm": 1.5656611919403076, |
| "learning_rate": 1.9635828441558515e-05, |
| "loss": 0.0127, |
| "step": 349 |
| }, |
| { |
| "epoch": 3.571428571428571, |
| "grad_norm": 1.1211833953857422, |
| "learning_rate": 1.9631050729295705e-05, |
| "loss": 0.0146, |
| "step": 350 |
| }, |
| { |
| "epoch": 3.5816326530612246, |
| "grad_norm": 14.682442665100098, |
| "learning_rate": 1.962624246950012e-05, |
| "loss": 0.0916, |
| "step": 351 |
| }, |
| { |
| "epoch": 3.5918367346938775, |
| "grad_norm": 9.328814506530762, |
| "learning_rate": 1.9621403677422487e-05, |
| "loss": 0.075, |
| "step": 352 |
| }, |
| { |
| "epoch": 3.6020408163265305, |
| "grad_norm": 0.16812823712825775, |
| "learning_rate": 1.9616534368410364e-05, |
| "loss": 0.0012, |
| "step": 353 |
| }, |
| { |
| "epoch": 3.612244897959184, |
| "grad_norm": 21.99177360534668, |
| "learning_rate": 1.961163455790811e-05, |
| "loss": 0.4742, |
| "step": 354 |
| }, |
| { |
| "epoch": 3.622448979591837, |
| "grad_norm": 0.015136899426579475, |
| "learning_rate": 1.960670426145682e-05, |
| "loss": 0.0002, |
| "step": 355 |
| }, |
| { |
| "epoch": 3.63265306122449, |
| "grad_norm": 4.217746257781982, |
| "learning_rate": 1.9601743494694295e-05, |
| "loss": 0.0332, |
| "step": 356 |
| }, |
| { |
| "epoch": 3.642857142857143, |
| "grad_norm": 8.988768577575684, |
| "learning_rate": 1.959675227335497e-05, |
| "loss": 0.1531, |
| "step": 357 |
| }, |
| { |
| "epoch": 3.6530612244897958, |
| "grad_norm": 0.8388896584510803, |
| "learning_rate": 1.9591730613269878e-05, |
| "loss": 0.0094, |
| "step": 358 |
| }, |
| { |
| "epoch": 3.663265306122449, |
| "grad_norm": 1.1567710638046265, |
| "learning_rate": 1.9586678530366607e-05, |
| "loss": 0.0141, |
| "step": 359 |
| }, |
| { |
| "epoch": 3.673469387755102, |
| "grad_norm": 0.5974036455154419, |
| "learning_rate": 1.9581596040669225e-05, |
| "loss": 0.005, |
| "step": 360 |
| }, |
| { |
| "epoch": 3.683673469387755, |
| "grad_norm": 2.6495542526245117, |
| "learning_rate": 1.9576483160298246e-05, |
| "loss": 0.0292, |
| "step": 361 |
| }, |
| { |
| "epoch": 3.693877551020408, |
| "grad_norm": 9.135188102722168, |
| "learning_rate": 1.9571339905470587e-05, |
| "loss": 0.0856, |
| "step": 362 |
| }, |
| { |
| "epoch": 3.704081632653061, |
| "grad_norm": 34.383392333984375, |
| "learning_rate": 1.9566166292499497e-05, |
| "loss": 0.5175, |
| "step": 363 |
| }, |
| { |
| "epoch": 3.7142857142857144, |
| "grad_norm": 64.9215087890625, |
| "learning_rate": 1.956096233779451e-05, |
| "loss": 0.7858, |
| "step": 364 |
| }, |
| { |
| "epoch": 3.7244897959183674, |
| "grad_norm": 3.036076545715332, |
| "learning_rate": 1.955572805786141e-05, |
| "loss": 0.0228, |
| "step": 365 |
| }, |
| { |
| "epoch": 3.7346938775510203, |
| "grad_norm": 0.060048237442970276, |
| "learning_rate": 1.9550463469302156e-05, |
| "loss": 0.0007, |
| "step": 366 |
| }, |
| { |
| "epoch": 3.7448979591836737, |
| "grad_norm": 14.525885581970215, |
| "learning_rate": 1.954516858881484e-05, |
| "loss": 0.1121, |
| "step": 367 |
| }, |
| { |
| "epoch": 3.7551020408163263, |
| "grad_norm": 0.025440210476517677, |
| "learning_rate": 1.953984343319364e-05, |
| "loss": 0.0003, |
| "step": 368 |
| }, |
| { |
| "epoch": 3.7653061224489797, |
| "grad_norm": 16.654895782470703, |
| "learning_rate": 1.953448801932875e-05, |
| "loss": 0.1813, |
| "step": 369 |
| }, |
| { |
| "epoch": 3.7755102040816326, |
| "grad_norm": 1.6078295707702637, |
| "learning_rate": 1.952910236420635e-05, |
| "loss": 0.0109, |
| "step": 370 |
| }, |
| { |
| "epoch": 3.7857142857142856, |
| "grad_norm": 0.4291606545448303, |
| "learning_rate": 1.9523686484908523e-05, |
| "loss": 0.0042, |
| "step": 371 |
| }, |
| { |
| "epoch": 3.795918367346939, |
| "grad_norm": 0.01769206114113331, |
| "learning_rate": 1.9518240398613226e-05, |
| "loss": 0.0002, |
| "step": 372 |
| }, |
| { |
| "epoch": 3.806122448979592, |
| "grad_norm": 6.454762935638428, |
| "learning_rate": 1.951276412259422e-05, |
| "loss": 0.0645, |
| "step": 373 |
| }, |
| { |
| "epoch": 3.816326530612245, |
| "grad_norm": 0.010405894368886948, |
| "learning_rate": 1.950725767422103e-05, |
| "loss": 0.0001, |
| "step": 374 |
| }, |
| { |
| "epoch": 3.826530612244898, |
| "grad_norm": 0.08737888187170029, |
| "learning_rate": 1.9501721070958868e-05, |
| "loss": 0.0007, |
| "step": 375 |
| }, |
| { |
| "epoch": 3.836734693877551, |
| "grad_norm": 0.008461426012217999, |
| "learning_rate": 1.9496154330368605e-05, |
| "loss": 0.0001, |
| "step": 376 |
| }, |
| { |
| "epoch": 3.8469387755102042, |
| "grad_norm": 0.03447158262133598, |
| "learning_rate": 1.949055747010669e-05, |
| "loss": 0.0004, |
| "step": 377 |
| }, |
| { |
| "epoch": 3.857142857142857, |
| "grad_norm": 0.08535359054803848, |
| "learning_rate": 1.9484930507925105e-05, |
| "loss": 0.0008, |
| "step": 378 |
| }, |
| { |
| "epoch": 3.86734693877551, |
| "grad_norm": 7.384174346923828, |
| "learning_rate": 1.947927346167132e-05, |
| "loss": 0.0635, |
| "step": 379 |
| }, |
| { |
| "epoch": 3.877551020408163, |
| "grad_norm": 0.10105583816766739, |
| "learning_rate": 1.9473586349288213e-05, |
| "loss": 0.0009, |
| "step": 380 |
| }, |
| { |
| "epoch": 3.887755102040816, |
| "grad_norm": 35.104393005371094, |
| "learning_rate": 1.9467869188814024e-05, |
| "loss": 0.9885, |
| "step": 381 |
| }, |
| { |
| "epoch": 3.8979591836734695, |
| "grad_norm": 3.9278883934020996, |
| "learning_rate": 1.946212199838231e-05, |
| "loss": 0.0363, |
| "step": 382 |
| }, |
| { |
| "epoch": 3.9081632653061225, |
| "grad_norm": 10.301095962524414, |
| "learning_rate": 1.945634479622187e-05, |
| "loss": 0.144, |
| "step": 383 |
| }, |
| { |
| "epoch": 3.9183673469387754, |
| "grad_norm": 42.96265411376953, |
| "learning_rate": 1.9450537600656688e-05, |
| "loss": 1.6117, |
| "step": 384 |
| }, |
| { |
| "epoch": 3.928571428571429, |
| "grad_norm": 52.48064041137695, |
| "learning_rate": 1.9444700430105892e-05, |
| "loss": 0.6172, |
| "step": 385 |
| }, |
| { |
| "epoch": 3.938775510204082, |
| "grad_norm": 1.2879438400268555, |
| "learning_rate": 1.9438833303083677e-05, |
| "loss": 0.0111, |
| "step": 386 |
| }, |
| { |
| "epoch": 3.9489795918367347, |
| "grad_norm": 2.2308216094970703, |
| "learning_rate": 1.943293623819925e-05, |
| "loss": 0.0106, |
| "step": 387 |
| }, |
| { |
| "epoch": 3.9591836734693877, |
| "grad_norm": 3.4741926193237305, |
| "learning_rate": 1.9427009254156783e-05, |
| "loss": 0.0252, |
| "step": 388 |
| }, |
| { |
| "epoch": 3.9693877551020407, |
| "grad_norm": 2.1992008686065674, |
| "learning_rate": 1.9421052369755335e-05, |
| "loss": 0.0249, |
| "step": 389 |
| }, |
| { |
| "epoch": 3.979591836734694, |
| "grad_norm": 6.670519828796387, |
| "learning_rate": 1.9415065603888813e-05, |
| "loss": 0.0537, |
| "step": 390 |
| }, |
| { |
| "epoch": 3.989795918367347, |
| "grad_norm": 3.5247645378112793, |
| "learning_rate": 1.940904897554589e-05, |
| "loss": 0.0229, |
| "step": 391 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.005356221459805965, |
| "learning_rate": 1.940300250380996e-05, |
| "loss": 0.0001, |
| "step": 392 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_dim_128_cosine_accuracy@1": 0.3591549295774648, |
| "eval_dim_128_cosine_accuracy@10": 0.42189500640204863, |
| "eval_dim_128_cosine_accuracy@3": 0.3674775928297055, |
| "eval_dim_128_cosine_accuracy@5": 0.3879641485275288, |
| "eval_dim_128_cosine_map@100": 0.44021886383811665, |
| "eval_dim_128_cosine_mrr@10": 0.37021726317500914, |
| "eval_dim_128_cosine_ndcg@10": 0.38125088011930397, |
| "eval_dim_128_cosine_precision@1": 0.3591549295774648, |
| "eval_dim_128_cosine_precision@10": 0.3190140845070422, |
| "eval_dim_128_cosine_precision@3": 0.36022193768672645, |
| "eval_dim_128_cosine_precision@5": 0.35134443021766965, |
| "eval_dim_128_cosine_recall@1": 0.03915563315840565, |
| "eval_dim_128_cosine_recall@10": 0.25990239457273806, |
| "eval_dim_128_cosine_recall@3": 0.11553418576873567, |
| "eval_dim_128_cosine_recall@5": 0.17491652721444478, |
| "eval_dim_256_cosine_accuracy@1": 0.3994878361075544, |
| "eval_dim_256_cosine_accuracy@10": 0.46030729833546735, |
| "eval_dim_256_cosine_accuracy@3": 0.4046094750320102, |
| "eval_dim_256_cosine_accuracy@5": 0.43021766965428937, |
| "eval_dim_256_cosine_map@100": 0.4793333960316136, |
| "eval_dim_256_cosine_mrr@10": 0.40997398532609775, |
| "eval_dim_256_cosine_ndcg@10": 0.42036202842456427, |
| "eval_dim_256_cosine_precision@1": 0.3994878361075544, |
| "eval_dim_256_cosine_precision@10": 0.354865556978233, |
| "eval_dim_256_cosine_precision@3": 0.3990610328638497, |
| "eval_dim_256_cosine_precision@5": 0.3897567221510883, |
| "eval_dim_256_cosine_recall@1": 0.042270750855143924, |
| "eval_dim_256_cosine_recall@10": 0.27590960778633977, |
| "eval_dim_256_cosine_recall@3": 0.12430640307454709, |
| "eval_dim_256_cosine_recall@5": 0.18776586614822713, |
| "eval_dim_512_cosine_accuracy@1": 0.4084507042253521, |
| "eval_dim_512_cosine_accuracy@10": 0.47247119078104993, |
| "eval_dim_512_cosine_accuracy@3": 0.4148527528809219, |
| "eval_dim_512_cosine_accuracy@5": 0.43982074263764404, |
| "eval_dim_512_cosine_map@100": 0.49048832795758607, |
| "eval_dim_512_cosine_mrr@10": 0.41940684511107007, |
| "eval_dim_512_cosine_ndcg@10": 0.4302962824455912, |
| "eval_dim_512_cosine_precision@1": 0.4084507042253521, |
| "eval_dim_512_cosine_precision@10": 0.3646606914212548, |
| "eval_dim_512_cosine_precision@3": 0.4082373026034998, |
| "eval_dim_512_cosine_precision@5": 0.3991037131882202, |
| "eval_dim_512_cosine_recall@1": 0.04301931947001461, |
| "eval_dim_512_cosine_recall@10": 0.2849750279222966, |
| "eval_dim_512_cosine_recall@3": 0.12607664541552657, |
| "eval_dim_512_cosine_recall@5": 0.19078237285443875, |
| "eval_dim_64_cosine_accuracy@1": 0.3111395646606914, |
| "eval_dim_64_cosine_accuracy@10": 0.3681177976952625, |
| "eval_dim_64_cosine_accuracy@3": 0.31690140845070425, |
| "eval_dim_64_cosine_accuracy@5": 0.3386683738796415, |
| "eval_dim_64_cosine_map@100": 0.3862030037114511, |
| "eval_dim_64_cosine_mrr@10": 0.3210490214011337, |
| "eval_dim_64_cosine_ndcg@10": 0.33142044222977946, |
| "eval_dim_64_cosine_precision@1": 0.3111395646606914, |
| "eval_dim_64_cosine_precision@10": 0.2779129321382843, |
| "eval_dim_64_cosine_precision@3": 0.3113529662825437, |
| "eval_dim_64_cosine_precision@5": 0.30435339308578746, |
| "eval_dim_64_cosine_recall@1": 0.033659624850547756, |
| "eval_dim_64_cosine_recall@10": 0.22914905789384257, |
| "eval_dim_64_cosine_recall@3": 0.09974008986215437, |
| "eval_dim_64_cosine_recall@5": 0.15190516916204871, |
| "eval_dim_768_cosine_accuracy@1": 0.4199743918053777, |
| "eval_dim_768_cosine_accuracy@10": 0.4910371318822023, |
| "eval_dim_768_cosine_accuracy@3": 0.42509603072983354, |
| "eval_dim_768_cosine_accuracy@5": 0.4526248399487836, |
| "eval_dim_768_cosine_map@100": 0.5040293251721872, |
| "eval_dim_768_cosine_mrr@10": 0.431798365953295, |
| "eval_dim_768_cosine_ndcg@10": 0.4437524319661329, |
| "eval_dim_768_cosine_precision@1": 0.4199743918053777, |
| "eval_dim_768_cosine_precision@10": 0.3774647887323943, |
| "eval_dim_768_cosine_precision@3": 0.4193341869398207, |
| "eval_dim_768_cosine_precision@5": 0.4102432778489116, |
| "eval_dim_768_cosine_recall@1": 0.043926224917091056, |
| "eval_dim_768_cosine_recall@10": 0.2942224009225615, |
| "eval_dim_768_cosine_recall@3": 0.12872931800473333, |
| "eval_dim_768_cosine_recall@5": 0.19528328675222623, |
| "eval_runtime": 183.3541, |
| "eval_samples_per_second": 0.0, |
| "eval_sequential_score": 0.33142044222977946, |
| "eval_steps_per_second": 0.0, |
| "step": 392 |
| }, |
| { |
| "epoch": 4.010204081632653, |
| "grad_norm": 153.88648986816406, |
| "learning_rate": 1.9396926207859085e-05, |
| "loss": 0.2346, |
| "step": 393 |
| }, |
| { |
| "epoch": 4.020408163265306, |
| "grad_norm": 0.7419346570968628, |
| "learning_rate": 1.9390820106965908e-05, |
| "loss": 0.0079, |
| "step": 394 |
| }, |
| { |
| "epoch": 4.030612244897959, |
| "grad_norm": 0.6101226210594177, |
| "learning_rate": 1.9384684220497605e-05, |
| "loss": 0.0058, |
| "step": 395 |
| }, |
| { |
| "epoch": 4.040816326530612, |
| "grad_norm": 0.44310683012008667, |
| "learning_rate": 1.9378518567915842e-05, |
| "loss": 0.0035, |
| "step": 396 |
| }, |
| { |
| "epoch": 4.051020408163265, |
| "grad_norm": 0.04433823004364967, |
| "learning_rate": 1.937232316877668e-05, |
| "loss": 0.0002, |
| "step": 397 |
| }, |
| { |
| "epoch": 4.061224489795919, |
| "grad_norm": 6.246387481689453, |
| "learning_rate": 1.9366098042730534e-05, |
| "loss": 0.028, |
| "step": 398 |
| }, |
| { |
| "epoch": 4.071428571428571, |
| "grad_norm": 0.004334597382694483, |
| "learning_rate": 1.9359843209522112e-05, |
| "loss": 0.0001, |
| "step": 399 |
| }, |
| { |
| "epoch": 4.081632653061225, |
| "grad_norm": 0.022266261279582977, |
| "learning_rate": 1.935355868899034e-05, |
| "loss": 0.0003, |
| "step": 400 |
| }, |
| { |
| "epoch": 4.091836734693878, |
| "grad_norm": 1.131543517112732, |
| "learning_rate": 1.934724450106831e-05, |
| "loss": 0.0121, |
| "step": 401 |
| }, |
| { |
| "epoch": 4.1020408163265305, |
| "grad_norm": 10.245617866516113, |
| "learning_rate": 1.934090066578321e-05, |
| "loss": 0.1073, |
| "step": 402 |
| }, |
| { |
| "epoch": 4.112244897959184, |
| "grad_norm": 0.15691065788269043, |
| "learning_rate": 1.933452720325626e-05, |
| "loss": 0.0012, |
| "step": 403 |
| }, |
| { |
| "epoch": 4.122448979591836, |
| "grad_norm": 0.028827032074332237, |
| "learning_rate": 1.932812413370265e-05, |
| "loss": 0.0003, |
| "step": 404 |
| }, |
| { |
| "epoch": 4.13265306122449, |
| "grad_norm": 0.442047119140625, |
| "learning_rate": 1.9321691477431487e-05, |
| "loss": 0.0025, |
| "step": 405 |
| }, |
| { |
| "epoch": 4.142857142857143, |
| "grad_norm": 1.5630896091461182, |
| "learning_rate": 1.9315229254845712e-05, |
| "loss": 0.0097, |
| "step": 406 |
| }, |
| { |
| "epoch": 4.153061224489796, |
| "grad_norm": 1.185296893119812, |
| "learning_rate": 1.9308737486442045e-05, |
| "loss": 0.0127, |
| "step": 407 |
| }, |
| { |
| "epoch": 4.163265306122449, |
| "grad_norm": 0.004454934503883123, |
| "learning_rate": 1.930221619281092e-05, |
| "loss": 0.0001, |
| "step": 408 |
| }, |
| { |
| "epoch": 4.173469387755102, |
| "grad_norm": 0.5739359259605408, |
| "learning_rate": 1.9295665394636414e-05, |
| "loss": 0.007, |
| "step": 409 |
| }, |
| { |
| "epoch": 4.183673469387755, |
| "grad_norm": 0.8776015639305115, |
| "learning_rate": 1.92890851126962e-05, |
| "loss": 0.0154, |
| "step": 410 |
| }, |
| { |
| "epoch": 4.1938775510204085, |
| "grad_norm": 0.010717377066612244, |
| "learning_rate": 1.9282475367861444e-05, |
| "loss": 0.0002, |
| "step": 411 |
| }, |
| { |
| "epoch": 4.204081632653061, |
| "grad_norm": 2.1066489219665527, |
| "learning_rate": 1.927583618109678e-05, |
| "loss": 0.0207, |
| "step": 412 |
| }, |
| { |
| "epoch": 4.214285714285714, |
| "grad_norm": 8.608527183532715, |
| "learning_rate": 1.926916757346022e-05, |
| "loss": 0.0682, |
| "step": 413 |
| }, |
| { |
| "epoch": 4.224489795918367, |
| "grad_norm": 11.61573314666748, |
| "learning_rate": 1.926246956610309e-05, |
| "loss": 0.1168, |
| "step": 414 |
| }, |
| { |
| "epoch": 4.23469387755102, |
| "grad_norm": 0.3539029657840729, |
| "learning_rate": 1.9255742180269967e-05, |
| "loss": 0.0019, |
| "step": 415 |
| }, |
| { |
| "epoch": 4.244897959183674, |
| "grad_norm": 95.681884765625, |
| "learning_rate": 1.924898543729861e-05, |
| "loss": 1.7119, |
| "step": 416 |
| }, |
| { |
| "epoch": 4.255102040816326, |
| "grad_norm": 0.0037097211461514235, |
| "learning_rate": 1.9242199358619897e-05, |
| "loss": 0.0001, |
| "step": 417 |
| }, |
| { |
| "epoch": 4.26530612244898, |
| "grad_norm": 0.029390670359134674, |
| "learning_rate": 1.923538396575774e-05, |
| "loss": 0.0004, |
| "step": 418 |
| }, |
| { |
| "epoch": 4.275510204081632, |
| "grad_norm": 158.73011779785156, |
| "learning_rate": 1.922853928032904e-05, |
| "loss": 3.5151, |
| "step": 419 |
| }, |
| { |
| "epoch": 4.285714285714286, |
| "grad_norm": 350.69512939453125, |
| "learning_rate": 1.92216653240436e-05, |
| "loss": 7.6674, |
| "step": 420 |
| }, |
| { |
| "epoch": 4.295918367346939, |
| "grad_norm": 94.87379455566406, |
| "learning_rate": 1.921476211870408e-05, |
| "loss": 2.1193, |
| "step": 421 |
| }, |
| { |
| "epoch": 4.3061224489795915, |
| "grad_norm": 101.0951919555664, |
| "learning_rate": 1.9207829686205882e-05, |
| "loss": 1.1982, |
| "step": 422 |
| }, |
| { |
| "epoch": 4.316326530612245, |
| "grad_norm": 0.10624423623085022, |
| "learning_rate": 1.920086804853714e-05, |
| "loss": 0.0018, |
| "step": 423 |
| }, |
| { |
| "epoch": 4.326530612244898, |
| "grad_norm": 0.08854330331087112, |
| "learning_rate": 1.9193877227778604e-05, |
| "loss": 0.0008, |
| "step": 424 |
| }, |
| { |
| "epoch": 4.336734693877551, |
| "grad_norm": 5.69531774520874, |
| "learning_rate": 1.9186857246103586e-05, |
| "loss": 0.0581, |
| "step": 425 |
| }, |
| { |
| "epoch": 4.346938775510204, |
| "grad_norm": 2.591644287109375, |
| "learning_rate": 1.91798081257779e-05, |
| "loss": 0.0319, |
| "step": 426 |
| }, |
| { |
| "epoch": 4.357142857142857, |
| "grad_norm": 0.3840072751045227, |
| "learning_rate": 1.917272988915976e-05, |
| "loss": 0.0041, |
| "step": 427 |
| }, |
| { |
| "epoch": 4.36734693877551, |
| "grad_norm": 0.0030958654824644327, |
| "learning_rate": 1.9165622558699763e-05, |
| "loss": 0.0, |
| "step": 428 |
| }, |
| { |
| "epoch": 4.377551020408164, |
| "grad_norm": 0.008273592218756676, |
| "learning_rate": 1.915848615694076e-05, |
| "loss": 0.0001, |
| "step": 429 |
| }, |
| { |
| "epoch": 4.387755102040816, |
| "grad_norm": 0.043385185301303864, |
| "learning_rate": 1.9151320706517814e-05, |
| "loss": 0.0005, |
| "step": 430 |
| }, |
| { |
| "epoch": 4.3979591836734695, |
| "grad_norm": 0.022923173382878304, |
| "learning_rate": 1.9144126230158127e-05, |
| "loss": 0.0002, |
| "step": 431 |
| }, |
| { |
| "epoch": 4.408163265306122, |
| "grad_norm": 0.14256472885608673, |
| "learning_rate": 1.913690275068097e-05, |
| "loss": 0.0012, |
| "step": 432 |
| }, |
| { |
| "epoch": 4.418367346938775, |
| "grad_norm": 5.968089580535889, |
| "learning_rate": 1.912965029099759e-05, |
| "loss": 0.0395, |
| "step": 433 |
| }, |
| { |
| "epoch": 4.428571428571429, |
| "grad_norm": 0.10840898007154465, |
| "learning_rate": 1.9122368874111172e-05, |
| "loss": 0.001, |
| "step": 434 |
| }, |
| { |
| "epoch": 4.438775510204081, |
| "grad_norm": 0.07155577838420868, |
| "learning_rate": 1.9115058523116734e-05, |
| "loss": 0.0006, |
| "step": 435 |
| }, |
| { |
| "epoch": 4.448979591836735, |
| "grad_norm": 1.8904563188552856, |
| "learning_rate": 1.9107719261201066e-05, |
| "loss": 0.0262, |
| "step": 436 |
| }, |
| { |
| "epoch": 4.459183673469388, |
| "grad_norm": 97.97481536865234, |
| "learning_rate": 1.9100351111642666e-05, |
| "loss": 4.1211, |
| "step": 437 |
| }, |
| { |
| "epoch": 4.469387755102041, |
| "grad_norm": 1.083544373512268, |
| "learning_rate": 1.9092954097811654e-05, |
| "loss": 0.0119, |
| "step": 438 |
| }, |
| { |
| "epoch": 4.479591836734694, |
| "grad_norm": 0.060115616768598557, |
| "learning_rate": 1.908552824316969e-05, |
| "loss": 0.0006, |
| "step": 439 |
| }, |
| { |
| "epoch": 4.489795918367347, |
| "grad_norm": 8.608622550964355, |
| "learning_rate": 1.9078073571269922e-05, |
| "loss": 0.0865, |
| "step": 440 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 0.1251484751701355, |
| "learning_rate": 1.90705901057569e-05, |
| "loss": 0.0007, |
| "step": 441 |
| }, |
| { |
| "epoch": 4.510204081632653, |
| "grad_norm": 0.1283632069826126, |
| "learning_rate": 1.9063077870366504e-05, |
| "loss": 0.0011, |
| "step": 442 |
| }, |
| { |
| "epoch": 4.520408163265306, |
| "grad_norm": 10.08908462524414, |
| "learning_rate": 1.9055536888925844e-05, |
| "loss": 0.0804, |
| "step": 443 |
| }, |
| { |
| "epoch": 4.530612244897959, |
| "grad_norm": 4.281910419464111, |
| "learning_rate": 1.9047967185353236e-05, |
| "loss": 0.0596, |
| "step": 444 |
| }, |
| { |
| "epoch": 4.540816326530612, |
| "grad_norm": 0.049331195652484894, |
| "learning_rate": 1.9040368783658075e-05, |
| "loss": 0.0006, |
| "step": 445 |
| }, |
| { |
| "epoch": 4.551020408163265, |
| "grad_norm": 0.18595637381076813, |
| "learning_rate": 1.903274170794079e-05, |
| "loss": 0.0019, |
| "step": 446 |
| }, |
| { |
| "epoch": 4.561224489795919, |
| "grad_norm": 44.52228927612305, |
| "learning_rate": 1.9025085982392753e-05, |
| "loss": 0.5596, |
| "step": 447 |
| }, |
| { |
| "epoch": 4.571428571428571, |
| "grad_norm": 0.19094879925251007, |
| "learning_rate": 1.9017401631296208e-05, |
| "loss": 0.0018, |
| "step": 448 |
| }, |
| { |
| "epoch": 4.581632653061225, |
| "grad_norm": 3.7835676670074463, |
| "learning_rate": 1.900968867902419e-05, |
| "loss": 0.0379, |
| "step": 449 |
| }, |
| { |
| "epoch": 4.591836734693878, |
| "grad_norm": 0.6503361463546753, |
| "learning_rate": 1.9001947150040462e-05, |
| "loss": 0.0076, |
| "step": 450 |
| }, |
| { |
| "epoch": 4.6020408163265305, |
| "grad_norm": 0.1325170397758484, |
| "learning_rate": 1.8994177068899414e-05, |
| "loss": 0.0012, |
| "step": 451 |
| }, |
| { |
| "epoch": 4.612244897959184, |
| "grad_norm": 0.0501476414501667, |
| "learning_rate": 1.8986378460246e-05, |
| "loss": 0.0006, |
| "step": 452 |
| }, |
| { |
| "epoch": 4.622448979591836, |
| "grad_norm": 83.80696868896484, |
| "learning_rate": 1.8978551348815653e-05, |
| "loss": 0.6476, |
| "step": 453 |
| }, |
| { |
| "epoch": 4.63265306122449, |
| "grad_norm": 0.006705591455101967, |
| "learning_rate": 1.897069575943422e-05, |
| "loss": 0.0, |
| "step": 454 |
| }, |
| { |
| "epoch": 4.642857142857143, |
| "grad_norm": 4.572281837463379, |
| "learning_rate": 1.896281171701787e-05, |
| "loss": 0.0214, |
| "step": 455 |
| }, |
| { |
| "epoch": 4.653061224489796, |
| "grad_norm": 0.03425569087266922, |
| "learning_rate": 1.895489924657301e-05, |
| "loss": 0.0005, |
| "step": 456 |
| }, |
| { |
| "epoch": 4.663265306122449, |
| "grad_norm": 219.26651000976562, |
| "learning_rate": 1.894695837319623e-05, |
| "loss": 4.8527, |
| "step": 457 |
| }, |
| { |
| "epoch": 4.673469387755102, |
| "grad_norm": 39.20528793334961, |
| "learning_rate": 1.8938989122074195e-05, |
| "loss": 0.4774, |
| "step": 458 |
| }, |
| { |
| "epoch": 4.683673469387755, |
| "grad_norm": 0.02694033645093441, |
| "learning_rate": 1.8930991518483586e-05, |
| "loss": 0.0003, |
| "step": 459 |
| }, |
| { |
| "epoch": 4.6938775510204085, |
| "grad_norm": 0.008928623050451279, |
| "learning_rate": 1.8922965587791e-05, |
| "loss": 0.0001, |
| "step": 460 |
| }, |
| { |
| "epoch": 4.704081632653061, |
| "grad_norm": 0.7024058103561401, |
| "learning_rate": 1.8914911355452895e-05, |
| "loss": 0.0075, |
| "step": 461 |
| }, |
| { |
| "epoch": 4.714285714285714, |
| "grad_norm": 0.0062184385024011135, |
| "learning_rate": 1.890682884701549e-05, |
| "loss": 0.0001, |
| "step": 462 |
| }, |
| { |
| "epoch": 4.724489795918368, |
| "grad_norm": 265.30419921875, |
| "learning_rate": 1.8898718088114688e-05, |
| "loss": 7.4959, |
| "step": 463 |
| }, |
| { |
| "epoch": 4.73469387755102, |
| "grad_norm": 0.0009059436270035803, |
| "learning_rate": 1.8890579104475996e-05, |
| "loss": 0.0, |
| "step": 464 |
| }, |
| { |
| "epoch": 4.744897959183674, |
| "grad_norm": 58.41782760620117, |
| "learning_rate": 1.8882411921914442e-05, |
| "loss": 2.1102, |
| "step": 465 |
| }, |
| { |
| "epoch": 4.755102040816326, |
| "grad_norm": 0.17638947069644928, |
| "learning_rate": 1.8874216566334502e-05, |
| "loss": 0.0027, |
| "step": 466 |
| }, |
| { |
| "epoch": 4.76530612244898, |
| "grad_norm": 0.30780017375946045, |
| "learning_rate": 1.8865993063730003e-05, |
| "loss": 0.0035, |
| "step": 467 |
| }, |
| { |
| "epoch": 4.775510204081632, |
| "grad_norm": 41.008460998535156, |
| "learning_rate": 1.885774144018405e-05, |
| "loss": 0.574, |
| "step": 468 |
| }, |
| { |
| "epoch": 4.785714285714286, |
| "grad_norm": 1.381316900253296, |
| "learning_rate": 1.8849461721868948e-05, |
| "loss": 0.0191, |
| "step": 469 |
| }, |
| { |
| "epoch": 4.795918367346939, |
| "grad_norm": 2.717698574066162, |
| "learning_rate": 1.8841153935046098e-05, |
| "loss": 0.0214, |
| "step": 470 |
| }, |
| { |
| "epoch": 4.8061224489795915, |
| "grad_norm": 0.2837541401386261, |
| "learning_rate": 1.8832818106065943e-05, |
| "loss": 0.0016, |
| "step": 471 |
| }, |
| { |
| "epoch": 4.816326530612245, |
| "grad_norm": 0.03325734660029411, |
| "learning_rate": 1.8824454261367862e-05, |
| "loss": 0.0003, |
| "step": 472 |
| }, |
| { |
| "epoch": 4.826530612244898, |
| "grad_norm": 0.0347367525100708, |
| "learning_rate": 1.881606242748009e-05, |
| "loss": 0.0003, |
| "step": 473 |
| }, |
| { |
| "epoch": 4.836734693877551, |
| "grad_norm": 0.7267290353775024, |
| "learning_rate": 1.8807642631019648e-05, |
| "loss": 0.0038, |
| "step": 474 |
| }, |
| { |
| "epoch": 4.846938775510204, |
| "grad_norm": 0.002910307375714183, |
| "learning_rate": 1.8799194898692238e-05, |
| "loss": 0.0, |
| "step": 475 |
| }, |
| { |
| "epoch": 4.857142857142857, |
| "grad_norm": 49.9831428527832, |
| "learning_rate": 1.8790719257292175e-05, |
| "loss": 0.4292, |
| "step": 476 |
| }, |
| { |
| "epoch": 4.86734693877551, |
| "grad_norm": 0.09869600087404251, |
| "learning_rate": 1.8782215733702286e-05, |
| "loss": 0.0009, |
| "step": 477 |
| }, |
| { |
| "epoch": 4.877551020408164, |
| "grad_norm": 4.552360534667969, |
| "learning_rate": 1.8773684354893848e-05, |
| "loss": 0.041, |
| "step": 478 |
| }, |
| { |
| "epoch": 4.887755102040816, |
| "grad_norm": 8.773714065551758, |
| "learning_rate": 1.8765125147926477e-05, |
| "loss": 0.0909, |
| "step": 479 |
| }, |
| { |
| "epoch": 4.8979591836734695, |
| "grad_norm": 0.24354512989521027, |
| "learning_rate": 1.875653813994806e-05, |
| "loss": 0.0024, |
| "step": 480 |
| }, |
| { |
| "epoch": 4.908163265306122, |
| "grad_norm": 0.005503151565790176, |
| "learning_rate": 1.874792335819466e-05, |
| "loss": 0.0001, |
| "step": 481 |
| }, |
| { |
| "epoch": 4.918367346938775, |
| "grad_norm": 150.60595703125, |
| "learning_rate": 1.873928082999043e-05, |
| "loss": 0.3607, |
| "step": 482 |
| }, |
| { |
| "epoch": 4.928571428571429, |
| "grad_norm": 44.48455047607422, |
| "learning_rate": 1.8730610582747538e-05, |
| "loss": 0.994, |
| "step": 483 |
| }, |
| { |
| "epoch": 4.938775510204081, |
| "grad_norm": 1.845575213432312, |
| "learning_rate": 1.8721912643966055e-05, |
| "loss": 0.0186, |
| "step": 484 |
| }, |
| { |
| "epoch": 4.948979591836735, |
| "grad_norm": 12.332234382629395, |
| "learning_rate": 1.8713187041233896e-05, |
| "loss": 0.206, |
| "step": 485 |
| }, |
| { |
| "epoch": 4.959183673469388, |
| "grad_norm": 0.08337131142616272, |
| "learning_rate": 1.8704433802226714e-05, |
| "loss": 0.0008, |
| "step": 486 |
| }, |
| { |
| "epoch": 4.969387755102041, |
| "grad_norm": 0.053039923310279846, |
| "learning_rate": 1.8695652954707823e-05, |
| "loss": 0.0006, |
| "step": 487 |
| }, |
| { |
| "epoch": 4.979591836734694, |
| "grad_norm": 22.981956481933594, |
| "learning_rate": 1.86868445265281e-05, |
| "loss": 0.2176, |
| "step": 488 |
| }, |
| { |
| "epoch": 4.989795918367347, |
| "grad_norm": 27.581205368041992, |
| "learning_rate": 1.86780085456259e-05, |
| "loss": 0.2219, |
| "step": 489 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 1.5270394086837769, |
| "learning_rate": 1.866914504002698e-05, |
| "loss": 0.0112, |
| "step": 490 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_dim_128_cosine_accuracy@1": 0.35979513444302175, |
| "eval_dim_128_cosine_accuracy@10": 0.4238156209987196, |
| "eval_dim_128_cosine_accuracy@3": 0.3687580025608195, |
| "eval_dim_128_cosine_accuracy@5": 0.39180537772087065, |
| "eval_dim_128_cosine_map@100": 0.437074453724746, |
| "eval_dim_128_cosine_mrr@10": 0.37126293112208564, |
| "eval_dim_128_cosine_ndcg@10": 0.38254301379946687, |
| "eval_dim_128_cosine_precision@1": 0.35979513444302175, |
| "eval_dim_128_cosine_precision@10": 0.32528809218950067, |
| "eval_dim_128_cosine_precision@3": 0.36022193768672645, |
| "eval_dim_128_cosine_precision@5": 0.353393085787452, |
| "eval_dim_128_cosine_recall@1": 0.037743801580456406, |
| "eval_dim_128_cosine_recall@10": 0.2526151629138788, |
| "eval_dim_128_cosine_recall@3": 0.11036730376509347, |
| "eval_dim_128_cosine_recall@5": 0.16748863189789, |
| "eval_dim_256_cosine_accuracy@1": 0.4001280409731114, |
| "eval_dim_256_cosine_accuracy@10": 0.4532650448143406, |
| "eval_dim_256_cosine_accuracy@3": 0.4039692701664533, |
| "eval_dim_256_cosine_accuracy@5": 0.42509603072983354, |
| "eval_dim_256_cosine_map@100": 0.46849072213833953, |
| "eval_dim_256_cosine_mrr@10": 0.4091183464422898, |
| "eval_dim_256_cosine_ndcg@10": 0.4189536617481397, |
| "eval_dim_256_cosine_precision@1": 0.4001280409731114, |
| "eval_dim_256_cosine_precision@10": 0.35627400768245837, |
| "eval_dim_256_cosine_precision@3": 0.3990610328638497, |
| "eval_dim_256_cosine_precision@5": 0.3896286811779769, |
| "eval_dim_256_cosine_recall@1": 0.040728817488916956, |
| "eval_dim_256_cosine_recall@10": 0.27123568020124944, |
| "eval_dim_256_cosine_recall@3": 0.11918215775911108, |
| "eval_dim_256_cosine_recall@5": 0.1806338192919952, |
| "eval_dim_512_cosine_accuracy@1": 0.40973111395646605, |
| "eval_dim_512_cosine_accuracy@10": 0.46350832266325226, |
| "eval_dim_512_cosine_accuracy@3": 0.41293213828425096, |
| "eval_dim_512_cosine_accuracy@5": 0.43918053777208704, |
| "eval_dim_512_cosine_map@100": 0.4812812064576608, |
| "eval_dim_512_cosine_mrr@10": 0.4190039225250491, |
| "eval_dim_512_cosine_ndcg@10": 0.42914983827992026, |
| "eval_dim_512_cosine_precision@1": 0.40973111395646605, |
| "eval_dim_512_cosine_precision@10": 0.3639564660691421, |
| "eval_dim_512_cosine_precision@3": 0.4080239009816474, |
| "eval_dim_512_cosine_precision@5": 0.3991037131882202, |
| "eval_dim_512_cosine_recall@1": 0.04220302608677807, |
| "eval_dim_512_cosine_recall@10": 0.2804441921081484, |
| "eval_dim_512_cosine_recall@3": 0.1232911911396302, |
| "eval_dim_512_cosine_recall@5": 0.18736557372627924, |
| "eval_dim_64_cosine_accuracy@1": 0.31498079385403327, |
| "eval_dim_64_cosine_accuracy@10": 0.3732394366197183, |
| "eval_dim_64_cosine_accuracy@3": 0.31882202304737517, |
| "eval_dim_64_cosine_accuracy@5": 0.34314980793854033, |
| "eval_dim_64_cosine_map@100": 0.3879101681453684, |
| "eval_dim_64_cosine_mrr@10": 0.32475204763937116, |
| "eval_dim_64_cosine_ndcg@10": 0.3352059384790271, |
| "eval_dim_64_cosine_precision@1": 0.31498079385403327, |
| "eval_dim_64_cosine_precision@10": 0.2811779769526248, |
| "eval_dim_64_cosine_precision@3": 0.3147673922321809, |
| "eval_dim_64_cosine_precision@5": 0.30742637644046095, |
| "eval_dim_64_cosine_recall@1": 0.03382134135767763, |
| "eval_dim_64_cosine_recall@10": 0.22782511684449083, |
| "eval_dim_64_cosine_recall@3": 0.09979477196363994, |
| "eval_dim_64_cosine_recall@5": 0.1517937835977082, |
| "eval_dim_768_cosine_accuracy@1": 0.41613316261203587, |
| "eval_dim_768_cosine_accuracy@10": 0.4731113956466069, |
| "eval_dim_768_cosine_accuracy@3": 0.4193341869398207, |
| "eval_dim_768_cosine_accuracy@5": 0.44366197183098594, |
| "eval_dim_768_cosine_map@100": 0.4890243769832116, |
| "eval_dim_768_cosine_mrr@10": 0.4256112432168768, |
| "eval_dim_768_cosine_ndcg@10": 0.43491019147566995, |
| "eval_dim_768_cosine_precision@1": 0.41613316261203587, |
| "eval_dim_768_cosine_precision@10": 0.36984635083226636, |
| "eval_dim_768_cosine_precision@3": 0.4148527528809219, |
| "eval_dim_768_cosine_precision@5": 0.4051216389244558, |
| "eval_dim_768_cosine_recall@1": 0.04254250894522427, |
| "eval_dim_768_cosine_recall@10": 0.28288145399591047, |
| "eval_dim_768_cosine_recall@3": 0.1244727395259559, |
| "eval_dim_768_cosine_recall@5": 0.1886213008910836, |
| "eval_runtime": 183.201, |
| "eval_samples_per_second": 0.0, |
| "eval_sequential_score": 0.3352059384790271, |
| "eval_steps_per_second": 0.0, |
| "step": 490 |
| }, |
| { |
| "epoch": 5.010204081632653, |
| "grad_norm": 0.05376085638999939, |
| "learning_rate": 1.866025403784439e-05, |
| "loss": 0.0005, |
| "step": 491 |
| }, |
| { |
| "epoch": 5.020408163265306, |
| "grad_norm": 0.16362005472183228, |
| "learning_rate": 1.865133556727839e-05, |
| "loss": 0.0016, |
| "step": 492 |
| }, |
| { |
| "epoch": 5.030612244897959, |
| "grad_norm": 0.9914959669113159, |
| "learning_rate": 1.864238965661637e-05, |
| "loss": 0.0091, |
| "step": 493 |
| }, |
| { |
| "epoch": 5.040816326530612, |
| "grad_norm": 30.57797622680664, |
| "learning_rate": 1.8633416334232754e-05, |
| "loss": 0.0467, |
| "step": 494 |
| }, |
| { |
| "epoch": 5.051020408163265, |
| "grad_norm": 2.5222413539886475, |
| "learning_rate": 1.862441562858891e-05, |
| "loss": 0.0229, |
| "step": 495 |
| }, |
| { |
| "epoch": 5.061224489795919, |
| "grad_norm": 0.0006655550096184015, |
| "learning_rate": 1.861538756823305e-05, |
| "loss": 0.0, |
| "step": 496 |
| }, |
| { |
| "epoch": 5.071428571428571, |
| "grad_norm": 0.11231075972318649, |
| "learning_rate": 1.8606332181800165e-05, |
| "loss": 0.0014, |
| "step": 497 |
| }, |
| { |
| "epoch": 5.081632653061225, |
| "grad_norm": 0.2929253876209259, |
| "learning_rate": 1.8597249498011906e-05, |
| "loss": 0.0045, |
| "step": 498 |
| }, |
| { |
| "epoch": 5.091836734693878, |
| "grad_norm": 0.013004861772060394, |
| "learning_rate": 1.8588139545676506e-05, |
| "loss": 0.0002, |
| "step": 499 |
| }, |
| { |
| "epoch": 5.1020408163265305, |
| "grad_norm": 10.929536819458008, |
| "learning_rate": 1.8579002353688695e-05, |
| "loss": 0.105, |
| "step": 500 |
| }, |
| { |
| "epoch": 5.112244897959184, |
| "grad_norm": 0.0029807123355567455, |
| "learning_rate": 1.8569837951029597e-05, |
| "loss": 0.0, |
| "step": 501 |
| }, |
| { |
| "epoch": 5.122448979591836, |
| "grad_norm": 0.6099005937576294, |
| "learning_rate": 1.8560646366766637e-05, |
| "loss": 0.0063, |
| "step": 502 |
| }, |
| { |
| "epoch": 5.13265306122449, |
| "grad_norm": 2.8765928745269775, |
| "learning_rate": 1.8551427630053464e-05, |
| "loss": 0.0242, |
| "step": 503 |
| }, |
| { |
| "epoch": 5.142857142857143, |
| "grad_norm": 0.00245065544731915, |
| "learning_rate": 1.8542181770129838e-05, |
| "loss": 0.0, |
| "step": 504 |
| }, |
| { |
| "epoch": 5.153061224489796, |
| "grad_norm": 0.3614043891429901, |
| "learning_rate": 1.8532908816321557e-05, |
| "loss": 0.0033, |
| "step": 505 |
| }, |
| { |
| "epoch": 5.163265306122449, |
| "grad_norm": 0.04071643948554993, |
| "learning_rate": 1.852360879804035e-05, |
| "loss": 0.0004, |
| "step": 506 |
| }, |
| { |
| "epoch": 5.173469387755102, |
| "grad_norm": 0.2372654378414154, |
| "learning_rate": 1.851428174478379e-05, |
| "loss": 0.0014, |
| "step": 507 |
| }, |
| { |
| "epoch": 5.183673469387755, |
| "grad_norm": 0.3527073264122009, |
| "learning_rate": 1.8504927686135194e-05, |
| "loss": 0.0027, |
| "step": 508 |
| }, |
| { |
| "epoch": 5.1938775510204085, |
| "grad_norm": 198.38519287109375, |
| "learning_rate": 1.849554665176354e-05, |
| "loss": 2.3163, |
| "step": 509 |
| }, |
| { |
| "epoch": 5.204081632653061, |
| "grad_norm": 20.711875915527344, |
| "learning_rate": 1.8486138671423366e-05, |
| "loss": 0.5547, |
| "step": 510 |
| }, |
| { |
| "epoch": 5.214285714285714, |
| "grad_norm": 3.5171825885772705, |
| "learning_rate": 1.8476703774954676e-05, |
| "loss": 0.0802, |
| "step": 511 |
| }, |
| { |
| "epoch": 5.224489795918367, |
| "grad_norm": 0.10855702310800552, |
| "learning_rate": 1.8467241992282842e-05, |
| "loss": 0.0011, |
| "step": 512 |
| }, |
| { |
| "epoch": 5.23469387755102, |
| "grad_norm": 0.008565492928028107, |
| "learning_rate": 1.845775335341852e-05, |
| "loss": 0.0001, |
| "step": 513 |
| }, |
| { |
| "epoch": 5.244897959183674, |
| "grad_norm": 4.928635597229004, |
| "learning_rate": 1.8448237888457546e-05, |
| "loss": 0.0109, |
| "step": 514 |
| }, |
| { |
| "epoch": 5.255102040816326, |
| "grad_norm": 0.488430380821228, |
| "learning_rate": 1.8438695627580832e-05, |
| "loss": 0.0044, |
| "step": 515 |
| }, |
| { |
| "epoch": 5.26530612244898, |
| "grad_norm": 0.40208637714385986, |
| "learning_rate": 1.8429126601054302e-05, |
| "loss": 0.0036, |
| "step": 516 |
| }, |
| { |
| "epoch": 5.275510204081632, |
| "grad_norm": 0.18485486507415771, |
| "learning_rate": 1.841953083922875e-05, |
| "loss": 0.0018, |
| "step": 517 |
| }, |
| { |
| "epoch": 5.285714285714286, |
| "grad_norm": 0.9628912210464478, |
| "learning_rate": 1.8409908372539788e-05, |
| "loss": 0.0073, |
| "step": 518 |
| }, |
| { |
| "epoch": 5.295918367346939, |
| "grad_norm": 0.21243244409561157, |
| "learning_rate": 1.8400259231507716e-05, |
| "loss": 0.0025, |
| "step": 519 |
| }, |
| { |
| "epoch": 5.3061224489795915, |
| "grad_norm": 0.005211708135902882, |
| "learning_rate": 1.8390583446737448e-05, |
| "loss": 0.0001, |
| "step": 520 |
| }, |
| { |
| "epoch": 5.316326530612245, |
| "grad_norm": 0.21359172463417053, |
| "learning_rate": 1.8380881048918406e-05, |
| "loss": 0.0031, |
| "step": 521 |
| }, |
| { |
| "epoch": 5.326530612244898, |
| "grad_norm": 64.16328430175781, |
| "learning_rate": 1.837115206882442e-05, |
| "loss": 0.1512, |
| "step": 522 |
| }, |
| { |
| "epoch": 5.336734693877551, |
| "grad_norm": 0.006268950179219246, |
| "learning_rate": 1.8361396537313628e-05, |
| "loss": 0.0001, |
| "step": 523 |
| }, |
| { |
| "epoch": 5.346938775510204, |
| "grad_norm": 1.6587684154510498, |
| "learning_rate": 1.835161448532839e-05, |
| "loss": 0.0169, |
| "step": 524 |
| }, |
| { |
| "epoch": 5.357142857142857, |
| "grad_norm": 0.22815948724746704, |
| "learning_rate": 1.8341805943895178e-05, |
| "loss": 0.0021, |
| "step": 525 |
| }, |
| { |
| "epoch": 5.36734693877551, |
| "grad_norm": 1.3137353658676147, |
| "learning_rate": 1.833197094412449e-05, |
| "loss": 0.0088, |
| "step": 526 |
| }, |
| { |
| "epoch": 5.377551020408164, |
| "grad_norm": 0.0314713716506958, |
| "learning_rate": 1.832210951721074e-05, |
| "loss": 0.0003, |
| "step": 527 |
| }, |
| { |
| "epoch": 5.387755102040816, |
| "grad_norm": 2.1728274822235107, |
| "learning_rate": 1.831222169443216e-05, |
| "loss": 0.0308, |
| "step": 528 |
| }, |
| { |
| "epoch": 5.3979591836734695, |
| "grad_norm": 0.0005056152585893869, |
| "learning_rate": 1.8302307507150703e-05, |
| "loss": 0.0, |
| "step": 529 |
| }, |
| { |
| "epoch": 5.408163265306122, |
| "grad_norm": 40.51821517944336, |
| "learning_rate": 1.8292366986811952e-05, |
| "loss": 0.3433, |
| "step": 530 |
| }, |
| { |
| "epoch": 5.418367346938775, |
| "grad_norm": 0.018755732104182243, |
| "learning_rate": 1.8282400164945006e-05, |
| "loss": 0.0003, |
| "step": 531 |
| }, |
| { |
| "epoch": 5.428571428571429, |
| "grad_norm": 0.2546168267726898, |
| "learning_rate": 1.8272407073162393e-05, |
| "loss": 0.0036, |
| "step": 532 |
| }, |
| { |
| "epoch": 5.438775510204081, |
| "grad_norm": 0.12199478596448898, |
| "learning_rate": 1.826238774315995e-05, |
| "loss": 0.0008, |
| "step": 533 |
| }, |
| { |
| "epoch": 5.448979591836735, |
| "grad_norm": 0.6051994562149048, |
| "learning_rate": 1.8252342206716754e-05, |
| "loss": 0.0056, |
| "step": 534 |
| }, |
| { |
| "epoch": 5.459183673469388, |
| "grad_norm": 0.16368649899959564, |
| "learning_rate": 1.8242270495694985e-05, |
| "loss": 0.0028, |
| "step": 535 |
| }, |
| { |
| "epoch": 5.469387755102041, |
| "grad_norm": 0.09096916019916534, |
| "learning_rate": 1.8232172642039856e-05, |
| "loss": 0.0009, |
| "step": 536 |
| }, |
| { |
| "epoch": 5.479591836734694, |
| "grad_norm": 0.13005401194095612, |
| "learning_rate": 1.8222048677779495e-05, |
| "loss": 0.0015, |
| "step": 537 |
| }, |
| { |
| "epoch": 5.489795918367347, |
| "grad_norm": 0.4791003465652466, |
| "learning_rate": 1.821189863502484e-05, |
| "loss": 0.0023, |
| "step": 538 |
| }, |
| { |
| "epoch": 5.5, |
| "grad_norm": 0.07315538823604584, |
| "learning_rate": 1.820172254596956e-05, |
| "loss": 0.0007, |
| "step": 539 |
| }, |
| { |
| "epoch": 5.510204081632653, |
| "grad_norm": 0.006543061695992947, |
| "learning_rate": 1.819152044288992e-05, |
| "loss": 0.0001, |
| "step": 540 |
| }, |
| { |
| "epoch": 5.520408163265306, |
| "grad_norm": 1.838545799255371, |
| "learning_rate": 1.8181292358144703e-05, |
| "loss": 0.0231, |
| "step": 541 |
| }, |
| { |
| "epoch": 5.530612244897959, |
| "grad_norm": 18.193073272705078, |
| "learning_rate": 1.81710383241751e-05, |
| "loss": 0.1314, |
| "step": 542 |
| }, |
| { |
| "epoch": 5.540816326530612, |
| "grad_norm": 145.21258544921875, |
| "learning_rate": 1.816075837350461e-05, |
| "loss": 4.2928, |
| "step": 543 |
| }, |
| { |
| "epoch": 5.551020408163265, |
| "grad_norm": 2.3930890560150146, |
| "learning_rate": 1.815045253873893e-05, |
| "loss": 0.0168, |
| "step": 544 |
| }, |
| { |
| "epoch": 5.561224489795919, |
| "grad_norm": 0.044057078659534454, |
| "learning_rate": 1.814012085256585e-05, |
| "loss": 0.0002, |
| "step": 545 |
| }, |
| { |
| "epoch": 5.571428571428571, |
| "grad_norm": 0.026266353204846382, |
| "learning_rate": 1.812976334775517e-05, |
| "loss": 0.0003, |
| "step": 546 |
| }, |
| { |
| "epoch": 5.581632653061225, |
| "grad_norm": 0.6757158041000366, |
| "learning_rate": 1.811938005715857e-05, |
| "loss": 0.0051, |
| "step": 547 |
| }, |
| { |
| "epoch": 5.591836734693878, |
| "grad_norm": 0.006156954448670149, |
| "learning_rate": 1.8108971013709512e-05, |
| "loss": 0.0001, |
| "step": 548 |
| }, |
| { |
| "epoch": 5.6020408163265305, |
| "grad_norm": 0.4917071461677551, |
| "learning_rate": 1.8098536250423154e-05, |
| "loss": 0.003, |
| "step": 549 |
| }, |
| { |
| "epoch": 5.612244897959184, |
| "grad_norm": 0.3305758237838745, |
| "learning_rate": 1.8088075800396227e-05, |
| "loss": 0.0037, |
| "step": 550 |
| }, |
| { |
| "epoch": 5.622448979591836, |
| "grad_norm": 0.5282797813415527, |
| "learning_rate": 1.8077589696806925e-05, |
| "loss": 0.0047, |
| "step": 551 |
| }, |
| { |
| "epoch": 5.63265306122449, |
| "grad_norm": 0.46409568190574646, |
| "learning_rate": 1.8067077972914822e-05, |
| "loss": 0.0042, |
| "step": 552 |
| }, |
| { |
| "epoch": 5.642857142857143, |
| "grad_norm": 0.10639617592096329, |
| "learning_rate": 1.8056540662060747e-05, |
| "loss": 0.0011, |
| "step": 553 |
| }, |
| { |
| "epoch": 5.653061224489796, |
| "grad_norm": 0.05673323944211006, |
| "learning_rate": 1.8045977797666685e-05, |
| "loss": 0.0007, |
| "step": 554 |
| }, |
| { |
| "epoch": 5.663265306122449, |
| "grad_norm": 0.6235435009002686, |
| "learning_rate": 1.8035389413235672e-05, |
| "loss": 0.0036, |
| "step": 555 |
| }, |
| { |
| "epoch": 5.673469387755102, |
| "grad_norm": 5.090694427490234, |
| "learning_rate": 1.8024775542351695e-05, |
| "loss": 0.0572, |
| "step": 556 |
| }, |
| { |
| "epoch": 5.683673469387755, |
| "grad_norm": 42.43338394165039, |
| "learning_rate": 1.8014136218679566e-05, |
| "loss": 0.4782, |
| "step": 557 |
| }, |
| { |
| "epoch": 5.6938775510204085, |
| "grad_norm": 0.4403914511203766, |
| "learning_rate": 1.8003471475964837e-05, |
| "loss": 0.0033, |
| "step": 558 |
| }, |
| { |
| "epoch": 5.704081632653061, |
| "grad_norm": 3.306797742843628, |
| "learning_rate": 1.7992781348033678e-05, |
| "loss": 0.0453, |
| "step": 559 |
| }, |
| { |
| "epoch": 5.714285714285714, |
| "grad_norm": 0.06393012404441833, |
| "learning_rate": 1.7982065868792772e-05, |
| "loss": 0.0006, |
| "step": 560 |
| }, |
| { |
| "epoch": 5.724489795918368, |
| "grad_norm": 0.027197500690817833, |
| "learning_rate": 1.7971325072229227e-05, |
| "loss": 0.0003, |
| "step": 561 |
| }, |
| { |
| "epoch": 5.73469387755102, |
| "grad_norm": 0.1507413387298584, |
| "learning_rate": 1.7960558992410432e-05, |
| "loss": 0.0018, |
| "step": 562 |
| }, |
| { |
| "epoch": 5.744897959183674, |
| "grad_norm": 5.787868022918701, |
| "learning_rate": 1.794976766348398e-05, |
| "loss": 0.0589, |
| "step": 563 |
| }, |
| { |
| "epoch": 5.755102040816326, |
| "grad_norm": 0.0066904472187161446, |
| "learning_rate": 1.7938951119677544e-05, |
| "loss": 0.0001, |
| "step": 564 |
| }, |
| { |
| "epoch": 5.76530612244898, |
| "grad_norm": 0.11523879319429398, |
| "learning_rate": 1.7928109395298777e-05, |
| "loss": 0.0013, |
| "step": 565 |
| }, |
| { |
| "epoch": 5.775510204081632, |
| "grad_norm": 0.004689464345574379, |
| "learning_rate": 1.79172425247352e-05, |
| "loss": 0.0001, |
| "step": 566 |
| }, |
| { |
| "epoch": 5.785714285714286, |
| "grad_norm": 0.09963806718587875, |
| "learning_rate": 1.7906350542454084e-05, |
| "loss": 0.0011, |
| "step": 567 |
| }, |
| { |
| "epoch": 5.795918367346939, |
| "grad_norm": 0.3132230341434479, |
| "learning_rate": 1.7895433483002356e-05, |
| "loss": 0.0019, |
| "step": 568 |
| }, |
| { |
| "epoch": 5.8061224489795915, |
| "grad_norm": 0.6437335014343262, |
| "learning_rate": 1.788449138100648e-05, |
| "loss": 0.0055, |
| "step": 569 |
| }, |
| { |
| "epoch": 5.816326530612245, |
| "grad_norm": 32.23442077636719, |
| "learning_rate": 1.787352427117235e-05, |
| "loss": 0.6808, |
| "step": 570 |
| }, |
| { |
| "epoch": 5.826530612244898, |
| "grad_norm": 0.12098479270935059, |
| "learning_rate": 1.7862532188285176e-05, |
| "loss": 0.0007, |
| "step": 571 |
| }, |
| { |
| "epoch": 5.836734693877551, |
| "grad_norm": 0.07133156061172485, |
| "learning_rate": 1.785151516720938e-05, |
| "loss": 0.0008, |
| "step": 572 |
| }, |
| { |
| "epoch": 5.846938775510204, |
| "grad_norm": 35.33832550048828, |
| "learning_rate": 1.7840473242888486e-05, |
| "loss": 0.7029, |
| "step": 573 |
| }, |
| { |
| "epoch": 5.857142857142857, |
| "grad_norm": 0.22042210400104523, |
| "learning_rate": 1.7829406450344998e-05, |
| "loss": 0.003, |
| "step": 574 |
| }, |
| { |
| "epoch": 5.86734693877551, |
| "grad_norm": 0.07045172154903412, |
| "learning_rate": 1.78183148246803e-05, |
| "loss": 0.0008, |
| "step": 575 |
| }, |
| { |
| "epoch": 5.877551020408164, |
| "grad_norm": 0.009592265821993351, |
| "learning_rate": 1.780719840107454e-05, |
| "loss": 0.0001, |
| "step": 576 |
| }, |
| { |
| "epoch": 5.887755102040816, |
| "grad_norm": 126.49264526367188, |
| "learning_rate": 1.779605721478652e-05, |
| "loss": 3.5868, |
| "step": 577 |
| }, |
| { |
| "epoch": 5.8979591836734695, |
| "grad_norm": 0.1849050521850586, |
| "learning_rate": 1.778489130115359e-05, |
| "loss": 0.0019, |
| "step": 578 |
| }, |
| { |
| "epoch": 5.908163265306122, |
| "grad_norm": 0.200727641582489, |
| "learning_rate": 1.777370069559152e-05, |
| "loss": 0.0023, |
| "step": 579 |
| }, |
| { |
| "epoch": 5.918367346938775, |
| "grad_norm": 4.7665019035339355, |
| "learning_rate": 1.7762485433594398e-05, |
| "loss": 0.0625, |
| "step": 580 |
| }, |
| { |
| "epoch": 5.928571428571429, |
| "grad_norm": 18.486141204833984, |
| "learning_rate": 1.775124555073452e-05, |
| "loss": 0.1886, |
| "step": 581 |
| }, |
| { |
| "epoch": 5.938775510204081, |
| "grad_norm": 2.461655855178833, |
| "learning_rate": 1.7739981082662275e-05, |
| "loss": 0.0253, |
| "step": 582 |
| }, |
| { |
| "epoch": 5.948979591836735, |
| "grad_norm": 77.3629379272461, |
| "learning_rate": 1.7728692065106032e-05, |
| "loss": 0.6732, |
| "step": 583 |
| }, |
| { |
| "epoch": 5.959183673469388, |
| "grad_norm": 0.00850403681397438, |
| "learning_rate": 1.771737853387202e-05, |
| "loss": 0.0001, |
| "step": 584 |
| }, |
| { |
| "epoch": 5.969387755102041, |
| "grad_norm": 28.388769149780273, |
| "learning_rate": 1.7706040524844222e-05, |
| "loss": 0.239, |
| "step": 585 |
| }, |
| { |
| "epoch": 5.979591836734694, |
| "grad_norm": 218.7047119140625, |
| "learning_rate": 1.769467807398426e-05, |
| "loss": 5.5812, |
| "step": 586 |
| }, |
| { |
| "epoch": 5.989795918367347, |
| "grad_norm": 0.9289079904556274, |
| "learning_rate": 1.768329121733128e-05, |
| "loss": 0.0129, |
| "step": 587 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.0184466615319252, |
| "learning_rate": 1.7671879991001838e-05, |
| "loss": 0.0002, |
| "step": 588 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_dim_128_cosine_accuracy@1": 0.3495518565941101, |
| "eval_dim_128_cosine_accuracy@10": 0.4154929577464789, |
| "eval_dim_128_cosine_accuracy@3": 0.353393085787452, |
| "eval_dim_128_cosine_accuracy@5": 0.38028169014084506, |
| "eval_dim_128_cosine_map@100": 0.4276170602366832, |
| "eval_dim_128_cosine_mrr@10": 0.3604691278987048, |
| "eval_dim_128_cosine_ndcg@10": 0.37104719123202995, |
| "eval_dim_128_cosine_precision@1": 0.3495518565941101, |
| "eval_dim_128_cosine_precision@10": 0.312291933418694, |
| "eval_dim_128_cosine_precision@3": 0.34848484848484845, |
| "eval_dim_128_cosine_precision@5": 0.339820742637644, |
| "eval_dim_128_cosine_recall@1": 0.037856544549247154, |
| "eval_dim_128_cosine_recall@10": 0.25324669198316696, |
| "eval_dim_128_cosine_recall@3": 0.11129608559954554, |
| "eval_dim_128_cosine_recall@5": 0.1684035717787531, |
| "eval_dim_256_cosine_accuracy@1": 0.38092189500640206, |
| "eval_dim_256_cosine_accuracy@10": 0.44366197183098594, |
| "eval_dim_256_cosine_accuracy@3": 0.38412291933418696, |
| "eval_dim_256_cosine_accuracy@5": 0.40973111395646605, |
| "eval_dim_256_cosine_map@100": 0.4594474328308739, |
| "eval_dim_256_cosine_mrr@10": 0.3912121720220308, |
| "eval_dim_256_cosine_ndcg@10": 0.4022720775585408, |
| "eval_dim_256_cosine_precision@1": 0.38092189500640206, |
| "eval_dim_256_cosine_precision@10": 0.3419974391805378, |
| "eval_dim_256_cosine_precision@3": 0.3800682885189927, |
| "eval_dim_256_cosine_precision@5": 0.3714468629961588, |
| "eval_dim_256_cosine_recall@1": 0.03938517779616356, |
| "eval_dim_256_cosine_recall@10": 0.2686379160273794, |
| "eval_dim_256_cosine_recall@3": 0.115945325123842, |
| "eval_dim_256_cosine_recall@5": 0.1763856331416056, |
| "eval_dim_512_cosine_accuracy@1": 0.39820742637644047, |
| "eval_dim_512_cosine_accuracy@10": 0.45902688860435337, |
| "eval_dim_512_cosine_accuracy@3": 0.4026888604353393, |
| "eval_dim_512_cosine_accuracy@5": 0.42701664532650446, |
| "eval_dim_512_cosine_map@100": 0.4748374115934728, |
| "eval_dim_512_cosine_mrr@10": 0.4087001808832792, |
| "eval_dim_512_cosine_ndcg@10": 0.41985375125260577, |
| "eval_dim_512_cosine_precision@1": 0.39820742637644047, |
| "eval_dim_512_cosine_precision@10": 0.356978233034571, |
| "eval_dim_512_cosine_precision@3": 0.39820742637644047, |
| "eval_dim_512_cosine_precision@5": 0.3892445582586428, |
| "eval_dim_512_cosine_recall@1": 0.04102662618120145, |
| "eval_dim_512_cosine_recall@10": 0.27956498455762785, |
| "eval_dim_512_cosine_recall@3": 0.12062294908153026, |
| "eval_dim_512_cosine_recall@5": 0.18402636375152, |
| "eval_dim_64_cosine_accuracy@1": 0.3002560819462228, |
| "eval_dim_64_cosine_accuracy@10": 0.3649167733674776, |
| "eval_dim_64_cosine_accuracy@3": 0.3072983354673495, |
| "eval_dim_64_cosine_accuracy@5": 0.33034571062740076, |
| "eval_dim_64_cosine_map@100": 0.37592384285873587, |
| "eval_dim_64_cosine_mrr@10": 0.31124626547161705, |
| "eval_dim_64_cosine_ndcg@10": 0.32194373763795797, |
| "eval_dim_64_cosine_precision@1": 0.3002560819462228, |
| "eval_dim_64_cosine_precision@10": 0.2714468629961588, |
| "eval_dim_64_cosine_precision@3": 0.30110968843363206, |
| "eval_dim_64_cosine_precision@5": 0.29475032010243274, |
| "eval_dim_64_cosine_recall@1": 0.03258312564919841, |
| "eval_dim_64_cosine_recall@10": 0.21983024392840253, |
| "eval_dim_64_cosine_recall@3": 0.09635373620336293, |
| "eval_dim_64_cosine_recall@5": 0.14603365016280198, |
| "eval_dim_768_cosine_accuracy@1": 0.4058898847631242, |
| "eval_dim_768_cosine_accuracy@10": 0.471190781049936, |
| "eval_dim_768_cosine_accuracy@3": 0.41037131882202305, |
| "eval_dim_768_cosine_accuracy@5": 0.4385403329065301, |
| "eval_dim_768_cosine_map@100": 0.48482154237960223, |
| "eval_dim_768_cosine_mrr@10": 0.4170203036400217, |
| "eval_dim_768_cosine_ndcg@10": 0.4292262848394862, |
| "eval_dim_768_cosine_precision@1": 0.4058898847631242, |
| "eval_dim_768_cosine_precision@10": 0.36651728553137003, |
| "eval_dim_768_cosine_precision@3": 0.4050362782757149, |
| "eval_dim_768_cosine_precision@5": 0.39705505761843796, |
| "eval_dim_768_cosine_recall@1": 0.04172967581938629, |
| "eval_dim_768_cosine_recall@10": 0.2836218270585116, |
| "eval_dim_768_cosine_recall@3": 0.12212076683897896, |
| "eval_dim_768_cosine_recall@5": 0.18584066050972378, |
| "eval_runtime": 183.4217, |
| "eval_samples_per_second": 0.0, |
| "eval_sequential_score": 0.32194373763795797, |
| "eval_steps_per_second": 0.0, |
| "step": 588 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1960, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 1 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|