IoannisKat1's picture
Add finetuned model
03e322c verified
{
"best_global_step": 490,
"best_metric": 0.38254301379946687,
"best_model_checkpoint": "nomic-ai/modernbert-embed-base/checkpoint-490",
"epoch": 6.0,
"eval_steps": 500,
"global_step": 588,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01020408163265306,
"grad_norm": 97.22399139404297,
"learning_rate": 0.0,
"loss": 4.0461,
"step": 1
},
{
"epoch": 0.02040816326530612,
"grad_norm": 201.0540313720703,
"learning_rate": 1.0204081632653061e-07,
"loss": 7.4174,
"step": 2
},
{
"epoch": 0.030612244897959183,
"grad_norm": 114.41702270507812,
"learning_rate": 2.0408163265306121e-07,
"loss": 4.0528,
"step": 3
},
{
"epoch": 0.04081632653061224,
"grad_norm": 74.34385681152344,
"learning_rate": 3.0612244897959183e-07,
"loss": 2.6554,
"step": 4
},
{
"epoch": 0.05102040816326531,
"grad_norm": 26.128253936767578,
"learning_rate": 4.0816326530612243e-07,
"loss": 0.5018,
"step": 5
},
{
"epoch": 0.061224489795918366,
"grad_norm": 55.006412506103516,
"learning_rate": 5.102040816326531e-07,
"loss": 0.7805,
"step": 6
},
{
"epoch": 0.07142857142857142,
"grad_norm": 119.91863250732422,
"learning_rate": 6.122448979591837e-07,
"loss": 2.9274,
"step": 7
},
{
"epoch": 0.08163265306122448,
"grad_norm": 147.19371032714844,
"learning_rate": 7.142857142857143e-07,
"loss": 4.5888,
"step": 8
},
{
"epoch": 0.09183673469387756,
"grad_norm": 173.107177734375,
"learning_rate": 8.163265306122449e-07,
"loss": 2.5851,
"step": 9
},
{
"epoch": 0.10204081632653061,
"grad_norm": 16.112817764282227,
"learning_rate": 9.183673469387756e-07,
"loss": 0.4261,
"step": 10
},
{
"epoch": 0.11224489795918367,
"grad_norm": 44.18981170654297,
"learning_rate": 1.0204081632653063e-06,
"loss": 0.6066,
"step": 11
},
{
"epoch": 0.12244897959183673,
"grad_norm": 40.847877502441406,
"learning_rate": 1.122448979591837e-06,
"loss": 1.5421,
"step": 12
},
{
"epoch": 0.1326530612244898,
"grad_norm": 22.00406265258789,
"learning_rate": 1.2244897959183673e-06,
"loss": 0.5044,
"step": 13
},
{
"epoch": 0.14285714285714285,
"grad_norm": 90.59610748291016,
"learning_rate": 1.3265306122448982e-06,
"loss": 1.6806,
"step": 14
},
{
"epoch": 0.15306122448979592,
"grad_norm": 82.69358825683594,
"learning_rate": 1.4285714285714286e-06,
"loss": 1.8214,
"step": 15
},
{
"epoch": 0.16326530612244897,
"grad_norm": 83.0409927368164,
"learning_rate": 1.5306122448979593e-06,
"loss": 2.6111,
"step": 16
},
{
"epoch": 0.17346938775510204,
"grad_norm": 187.79660034179688,
"learning_rate": 1.6326530612244897e-06,
"loss": 8.3034,
"step": 17
},
{
"epoch": 0.1836734693877551,
"grad_norm": 27.019817352294922,
"learning_rate": 1.7346938775510206e-06,
"loss": 0.5837,
"step": 18
},
{
"epoch": 0.19387755102040816,
"grad_norm": 57.62622833251953,
"learning_rate": 1.8367346938775512e-06,
"loss": 2.4009,
"step": 19
},
{
"epoch": 0.20408163265306123,
"grad_norm": 30.775474548339844,
"learning_rate": 1.938775510204082e-06,
"loss": 0.8685,
"step": 20
},
{
"epoch": 0.21428571428571427,
"grad_norm": 184.99273681640625,
"learning_rate": 2.0408163265306125e-06,
"loss": 3.1922,
"step": 21
},
{
"epoch": 0.22448979591836735,
"grad_norm": 58.83700180053711,
"learning_rate": 2.1428571428571427e-06,
"loss": 4.7617,
"step": 22
},
{
"epoch": 0.23469387755102042,
"grad_norm": 61.65654373168945,
"learning_rate": 2.244897959183674e-06,
"loss": 1.962,
"step": 23
},
{
"epoch": 0.24489795918367346,
"grad_norm": 164.40609741210938,
"learning_rate": 2.3469387755102044e-06,
"loss": 7.5857,
"step": 24
},
{
"epoch": 0.25510204081632654,
"grad_norm": 5.837972640991211,
"learning_rate": 2.4489795918367347e-06,
"loss": 0.1287,
"step": 25
},
{
"epoch": 0.2653061224489796,
"grad_norm": 82.78257751464844,
"learning_rate": 2.5510204081632657e-06,
"loss": 3.0167,
"step": 26
},
{
"epoch": 0.2755102040816326,
"grad_norm": 62.918235778808594,
"learning_rate": 2.6530612244897964e-06,
"loss": 3.8032,
"step": 27
},
{
"epoch": 0.2857142857142857,
"grad_norm": 118.64309692382812,
"learning_rate": 2.7551020408163266e-06,
"loss": 3.8445,
"step": 28
},
{
"epoch": 0.29591836734693877,
"grad_norm": 103.83515167236328,
"learning_rate": 2.8571428571428573e-06,
"loss": 1.6414,
"step": 29
},
{
"epoch": 0.30612244897959184,
"grad_norm": 162.89938354492188,
"learning_rate": 2.959183673469388e-06,
"loss": 6.3828,
"step": 30
},
{
"epoch": 0.3163265306122449,
"grad_norm": 93.17887115478516,
"learning_rate": 3.0612244897959185e-06,
"loss": 3.1969,
"step": 31
},
{
"epoch": 0.32653061224489793,
"grad_norm": 35.798160552978516,
"learning_rate": 3.1632653061224496e-06,
"loss": 0.7605,
"step": 32
},
{
"epoch": 0.336734693877551,
"grad_norm": 91.22254943847656,
"learning_rate": 3.2653061224489794e-06,
"loss": 5.0711,
"step": 33
},
{
"epoch": 0.3469387755102041,
"grad_norm": 147.5310821533203,
"learning_rate": 3.3673469387755105e-06,
"loss": 2.6523,
"step": 34
},
{
"epoch": 0.35714285714285715,
"grad_norm": 20.746273040771484,
"learning_rate": 3.469387755102041e-06,
"loss": 0.4005,
"step": 35
},
{
"epoch": 0.3673469387755102,
"grad_norm": 43.79488754272461,
"learning_rate": 3.5714285714285718e-06,
"loss": 1.757,
"step": 36
},
{
"epoch": 0.37755102040816324,
"grad_norm": 133.88282775878906,
"learning_rate": 3.6734693877551024e-06,
"loss": 3.1397,
"step": 37
},
{
"epoch": 0.3877551020408163,
"grad_norm": 203.5764617919922,
"learning_rate": 3.7755102040816327e-06,
"loss": 3.6261,
"step": 38
},
{
"epoch": 0.3979591836734694,
"grad_norm": 80.596923828125,
"learning_rate": 3.877551020408164e-06,
"loss": 2.7427,
"step": 39
},
{
"epoch": 0.40816326530612246,
"grad_norm": 15.089832305908203,
"learning_rate": 3.979591836734694e-06,
"loss": 0.4561,
"step": 40
},
{
"epoch": 0.41836734693877553,
"grad_norm": 1.917051076889038,
"learning_rate": 4.081632653061225e-06,
"loss": 0.0331,
"step": 41
},
{
"epoch": 0.42857142857142855,
"grad_norm": 126.42117309570312,
"learning_rate": 4.183673469387755e-06,
"loss": 5.1981,
"step": 42
},
{
"epoch": 0.4387755102040816,
"grad_norm": 19.87098503112793,
"learning_rate": 4.2857142857142855e-06,
"loss": 0.5115,
"step": 43
},
{
"epoch": 0.4489795918367347,
"grad_norm": 38.2932014465332,
"learning_rate": 4.3877551020408165e-06,
"loss": 1.119,
"step": 44
},
{
"epoch": 0.45918367346938777,
"grad_norm": 105.88655090332031,
"learning_rate": 4.489795918367348e-06,
"loss": 1.8869,
"step": 45
},
{
"epoch": 0.46938775510204084,
"grad_norm": 77.14546203613281,
"learning_rate": 4.591836734693878e-06,
"loss": 2.7846,
"step": 46
},
{
"epoch": 0.47959183673469385,
"grad_norm": 85.57715606689453,
"learning_rate": 4.693877551020409e-06,
"loss": 2.4171,
"step": 47
},
{
"epoch": 0.4897959183673469,
"grad_norm": 58.05616760253906,
"learning_rate": 4.795918367346939e-06,
"loss": 2.6935,
"step": 48
},
{
"epoch": 0.5,
"grad_norm": 44.124664306640625,
"learning_rate": 4.897959183673469e-06,
"loss": 1.0925,
"step": 49
},
{
"epoch": 0.5102040816326531,
"grad_norm": 73.12091827392578,
"learning_rate": 5e-06,
"loss": 2.0241,
"step": 50
},
{
"epoch": 0.5204081632653061,
"grad_norm": 185.66015625,
"learning_rate": 5.1020408163265315e-06,
"loss": 7.4609,
"step": 51
},
{
"epoch": 0.5306122448979592,
"grad_norm": 93.33148193359375,
"learning_rate": 5.204081632653062e-06,
"loss": 3.2983,
"step": 52
},
{
"epoch": 0.5408163265306123,
"grad_norm": 77.21890258789062,
"learning_rate": 5.306122448979593e-06,
"loss": 3.8886,
"step": 53
},
{
"epoch": 0.5510204081632653,
"grad_norm": 39.11575698852539,
"learning_rate": 5.408163265306123e-06,
"loss": 0.5936,
"step": 54
},
{
"epoch": 0.5612244897959183,
"grad_norm": 48.75920867919922,
"learning_rate": 5.510204081632653e-06,
"loss": 0.8204,
"step": 55
},
{
"epoch": 0.5714285714285714,
"grad_norm": 9.229560852050781,
"learning_rate": 5.6122448979591834e-06,
"loss": 0.1836,
"step": 56
},
{
"epoch": 0.5816326530612245,
"grad_norm": 52.3462028503418,
"learning_rate": 5.7142857142857145e-06,
"loss": 0.4946,
"step": 57
},
{
"epoch": 0.5918367346938775,
"grad_norm": 30.56440544128418,
"learning_rate": 5.816326530612246e-06,
"loss": 0.2755,
"step": 58
},
{
"epoch": 0.6020408163265306,
"grad_norm": 13.171940803527832,
"learning_rate": 5.918367346938776e-06,
"loss": 0.1641,
"step": 59
},
{
"epoch": 0.6122448979591837,
"grad_norm": 55.642364501953125,
"learning_rate": 6.020408163265307e-06,
"loss": 1.2537,
"step": 60
},
{
"epoch": 0.6224489795918368,
"grad_norm": 67.12672424316406,
"learning_rate": 6.122448979591837e-06,
"loss": 4.3895,
"step": 61
},
{
"epoch": 0.6326530612244898,
"grad_norm": 133.45712280273438,
"learning_rate": 6.224489795918368e-06,
"loss": 3.2041,
"step": 62
},
{
"epoch": 0.6428571428571429,
"grad_norm": 115.94568634033203,
"learning_rate": 6.326530612244899e-06,
"loss": 3.2087,
"step": 63
},
{
"epoch": 0.6530612244897959,
"grad_norm": 196.57113647460938,
"learning_rate": 6.4285714285714295e-06,
"loss": 8.0364,
"step": 64
},
{
"epoch": 0.6632653061224489,
"grad_norm": 43.574398040771484,
"learning_rate": 6.530612244897959e-06,
"loss": 0.7748,
"step": 65
},
{
"epoch": 0.673469387755102,
"grad_norm": 133.8882293701172,
"learning_rate": 6.63265306122449e-06,
"loss": 4.7505,
"step": 66
},
{
"epoch": 0.6836734693877551,
"grad_norm": 35.49979019165039,
"learning_rate": 6.734693877551021e-06,
"loss": 2.2919,
"step": 67
},
{
"epoch": 0.6938775510204082,
"grad_norm": 56.431461334228516,
"learning_rate": 6.836734693877551e-06,
"loss": 0.6432,
"step": 68
},
{
"epoch": 0.7040816326530612,
"grad_norm": 44.0866813659668,
"learning_rate": 6.938775510204082e-06,
"loss": 0.97,
"step": 69
},
{
"epoch": 0.7142857142857143,
"grad_norm": 156.4910125732422,
"learning_rate": 7.0408163265306125e-06,
"loss": 4.787,
"step": 70
},
{
"epoch": 0.7244897959183674,
"grad_norm": 339.540771484375,
"learning_rate": 7.1428571428571436e-06,
"loss": 2.6329,
"step": 71
},
{
"epoch": 0.7346938775510204,
"grad_norm": 80.52706146240234,
"learning_rate": 7.244897959183675e-06,
"loss": 1.2897,
"step": 72
},
{
"epoch": 0.7448979591836735,
"grad_norm": 115.99695587158203,
"learning_rate": 7.346938775510205e-06,
"loss": 2.2093,
"step": 73
},
{
"epoch": 0.7551020408163265,
"grad_norm": 59.96979904174805,
"learning_rate": 7.448979591836736e-06,
"loss": 1.7263,
"step": 74
},
{
"epoch": 0.7653061224489796,
"grad_norm": 40.83548355102539,
"learning_rate": 7.551020408163265e-06,
"loss": 0.9284,
"step": 75
},
{
"epoch": 0.7755102040816326,
"grad_norm": 14.140007019042969,
"learning_rate": 7.653061224489796e-06,
"loss": 0.2508,
"step": 76
},
{
"epoch": 0.7857142857142857,
"grad_norm": 0.5128963589668274,
"learning_rate": 7.755102040816327e-06,
"loss": 0.0072,
"step": 77
},
{
"epoch": 0.7959183673469388,
"grad_norm": 10.41791820526123,
"learning_rate": 7.857142857142858e-06,
"loss": 0.1753,
"step": 78
},
{
"epoch": 0.8061224489795918,
"grad_norm": 30.593122482299805,
"learning_rate": 7.959183673469388e-06,
"loss": 1.2562,
"step": 79
},
{
"epoch": 0.8163265306122449,
"grad_norm": 7.927923679351807,
"learning_rate": 8.06122448979592e-06,
"loss": 0.1105,
"step": 80
},
{
"epoch": 0.826530612244898,
"grad_norm": 156.017333984375,
"learning_rate": 8.16326530612245e-06,
"loss": 4.0241,
"step": 81
},
{
"epoch": 0.8367346938775511,
"grad_norm": 116.1068344116211,
"learning_rate": 8.26530612244898e-06,
"loss": 1.655,
"step": 82
},
{
"epoch": 0.8469387755102041,
"grad_norm": 3.5885918140411377,
"learning_rate": 8.36734693877551e-06,
"loss": 0.0406,
"step": 83
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.3218710422515869,
"learning_rate": 8.469387755102042e-06,
"loss": 0.0033,
"step": 84
},
{
"epoch": 0.8673469387755102,
"grad_norm": 156.8653564453125,
"learning_rate": 8.571428571428571e-06,
"loss": 3.2183,
"step": 85
},
{
"epoch": 0.8775510204081632,
"grad_norm": 19.75959587097168,
"learning_rate": 8.673469387755103e-06,
"loss": 0.1812,
"step": 86
},
{
"epoch": 0.8877551020408163,
"grad_norm": 9.409197807312012,
"learning_rate": 8.775510204081633e-06,
"loss": 0.2222,
"step": 87
},
{
"epoch": 0.8979591836734694,
"grad_norm": 44.61834716796875,
"learning_rate": 8.877551020408163e-06,
"loss": 0.6726,
"step": 88
},
{
"epoch": 0.9081632653061225,
"grad_norm": 98.52664947509766,
"learning_rate": 8.979591836734695e-06,
"loss": 3.5891,
"step": 89
},
{
"epoch": 0.9183673469387755,
"grad_norm": 27.778385162353516,
"learning_rate": 9.081632653061225e-06,
"loss": 0.3833,
"step": 90
},
{
"epoch": 0.9285714285714286,
"grad_norm": 1.6346553564071655,
"learning_rate": 9.183673469387756e-06,
"loss": 0.0257,
"step": 91
},
{
"epoch": 0.9387755102040817,
"grad_norm": 176.21873474121094,
"learning_rate": 9.285714285714288e-06,
"loss": 4.635,
"step": 92
},
{
"epoch": 0.9489795918367347,
"grad_norm": 93.562255859375,
"learning_rate": 9.387755102040818e-06,
"loss": 2.1625,
"step": 93
},
{
"epoch": 0.9591836734693877,
"grad_norm": 20.919546127319336,
"learning_rate": 9.489795918367348e-06,
"loss": 0.3742,
"step": 94
},
{
"epoch": 0.9693877551020408,
"grad_norm": 8.895219802856445,
"learning_rate": 9.591836734693878e-06,
"loss": 0.1946,
"step": 95
},
{
"epoch": 0.9795918367346939,
"grad_norm": 17.501354217529297,
"learning_rate": 9.693877551020408e-06,
"loss": 0.2705,
"step": 96
},
{
"epoch": 0.9897959183673469,
"grad_norm": 226.07272338867188,
"learning_rate": 9.795918367346939e-06,
"loss": 12.4745,
"step": 97
},
{
"epoch": 1.0,
"grad_norm": 81.55729675292969,
"learning_rate": 9.89795918367347e-06,
"loss": 1.718,
"step": 98
},
{
"epoch": 1.0,
"eval_dim_128_cosine_accuracy@1": 0.32714468629961585,
"eval_dim_128_cosine_accuracy@10": 0.4014084507042254,
"eval_dim_128_cosine_accuracy@3": 0.33098591549295775,
"eval_dim_128_cosine_accuracy@5": 0.36619718309859156,
"eval_dim_128_cosine_map@100": 0.41817891915077654,
"eval_dim_128_cosine_mrr@10": 0.33965281588520974,
"eval_dim_128_cosine_ndcg@10": 0.35251068430131405,
"eval_dim_128_cosine_precision@1": 0.32714468629961585,
"eval_dim_128_cosine_precision@10": 0.293213828425096,
"eval_dim_128_cosine_precision@3": 0.3260776781903542,
"eval_dim_128_cosine_precision@5": 0.3192061459667094,
"eval_dim_128_cosine_recall@1": 0.038265678793809856,
"eval_dim_128_cosine_recall@10": 0.2562091364663566,
"eval_dim_128_cosine_recall@3": 0.11220810535939996,
"eval_dim_128_cosine_recall@5": 0.17048689772564513,
"eval_dim_256_cosine_accuracy@1": 0.3700384122919334,
"eval_dim_256_cosine_accuracy@10": 0.44622279129321385,
"eval_dim_256_cosine_accuracy@3": 0.3758002560819462,
"eval_dim_256_cosine_accuracy@5": 0.4058898847631242,
"eval_dim_256_cosine_map@100": 0.45936107144286625,
"eval_dim_256_cosine_mrr@10": 0.3829908745401701,
"eval_dim_256_cosine_ndcg@10": 0.3963587963422467,
"eval_dim_256_cosine_precision@1": 0.3700384122919334,
"eval_dim_256_cosine_precision@10": 0.33565941101152363,
"eval_dim_256_cosine_precision@3": 0.36982501067008117,
"eval_dim_256_cosine_precision@5": 0.36274007682458387,
"eval_dim_256_cosine_recall@1": 0.04011382918503575,
"eval_dim_256_cosine_recall@10": 0.2707413822992358,
"eval_dim_256_cosine_recall@3": 0.11768720228638535,
"eval_dim_256_cosine_recall@5": 0.17899109052239595,
"eval_dim_512_cosine_accuracy@1": 0.39308578745198464,
"eval_dim_512_cosine_accuracy@10": 0.4564660691421255,
"eval_dim_512_cosine_accuracy@3": 0.3975672215108835,
"eval_dim_512_cosine_accuracy@5": 0.42509603072983354,
"eval_dim_512_cosine_map@100": 0.47901702829368287,
"eval_dim_512_cosine_mrr@10": 0.4040218584232665,
"eval_dim_512_cosine_ndcg@10": 0.4158045483689078,
"eval_dim_512_cosine_precision@1": 0.39308578745198464,
"eval_dim_512_cosine_precision@10": 0.3489756722151088,
"eval_dim_512_cosine_precision@3": 0.3922321809645753,
"eval_dim_512_cosine_precision@5": 0.382842509603073,
"eval_dim_512_cosine_recall@1": 0.04316243704465245,
"eval_dim_512_cosine_recall@10": 0.2842422386776242,
"eval_dim_512_cosine_recall@3": 0.126195512947793,
"eval_dim_512_cosine_recall@5": 0.1909624031231601,
"eval_dim_64_cosine_accuracy@1": 0.2855313700384123,
"eval_dim_64_cosine_accuracy@10": 0.3559539052496799,
"eval_dim_64_cosine_accuracy@3": 0.29257362355953903,
"eval_dim_64_cosine_accuracy@5": 0.323303457106274,
"eval_dim_64_cosine_map@100": 0.37060921311190964,
"eval_dim_64_cosine_mrr@10": 0.2980323862366112,
"eval_dim_64_cosine_ndcg@10": 0.31079967856179397,
"eval_dim_64_cosine_precision@1": 0.2855313700384123,
"eval_dim_64_cosine_precision@10": 0.258258642765685,
"eval_dim_64_cosine_precision@3": 0.2861715749039693,
"eval_dim_64_cosine_precision@5": 0.28079385403329066,
"eval_dim_64_cosine_recall@1": 0.03346436650150152,
"eval_dim_64_cosine_recall@10": 0.22602132535917333,
"eval_dim_64_cosine_recall@3": 0.09850444385264467,
"eval_dim_64_cosine_recall@5": 0.14977503796339806,
"eval_dim_768_cosine_accuracy@1": 0.3975672215108835,
"eval_dim_768_cosine_accuracy@10": 0.47439180537772085,
"eval_dim_768_cosine_accuracy@3": 0.4026888604353393,
"eval_dim_768_cosine_accuracy@5": 0.4359795134443022,
"eval_dim_768_cosine_map@100": 0.48939446674087983,
"eval_dim_768_cosine_mrr@10": 0.41065941101152337,
"eval_dim_768_cosine_ndcg@10": 0.42417702900730875,
"eval_dim_768_cosine_precision@1": 0.3975672215108835,
"eval_dim_768_cosine_precision@10": 0.35678617157490394,
"eval_dim_768_cosine_precision@3": 0.3969270166453265,
"eval_dim_768_cosine_precision@5": 0.3884763124199744,
"eval_dim_768_cosine_recall@1": 0.044071996649064187,
"eval_dim_768_cosine_recall@10": 0.29232434166264154,
"eval_dim_768_cosine_recall@3": 0.12908414526605214,
"eval_dim_768_cosine_recall@5": 0.19569710007543972,
"eval_runtime": 186.9674,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.31079967856179397,
"eval_steps_per_second": 0.0,
"step": 98
},
{
"epoch": 1.010204081632653,
"grad_norm": 145.2915496826172,
"learning_rate": 1e-05,
"loss": 5.4827,
"step": 99
},
{
"epoch": 1.0204081632653061,
"grad_norm": 188.6737060546875,
"learning_rate": 1.0102040816326531e-05,
"loss": 7.4285,
"step": 100
},
{
"epoch": 1.030612244897959,
"grad_norm": 90.18331146240234,
"learning_rate": 1.0204081632653063e-05,
"loss": 2.6083,
"step": 101
},
{
"epoch": 1.0408163265306123,
"grad_norm": 48.41734313964844,
"learning_rate": 1.0306122448979591e-05,
"loss": 0.2821,
"step": 102
},
{
"epoch": 1.0510204081632653,
"grad_norm": 11.53239631652832,
"learning_rate": 1.0408163265306123e-05,
"loss": 0.2032,
"step": 103
},
{
"epoch": 1.0612244897959184,
"grad_norm": 15.895277976989746,
"learning_rate": 1.0510204081632654e-05,
"loss": 0.2603,
"step": 104
},
{
"epoch": 1.0714285714285714,
"grad_norm": 8.3611421585083,
"learning_rate": 1.0612244897959186e-05,
"loss": 0.0869,
"step": 105
},
{
"epoch": 1.0816326530612246,
"grad_norm": 3.729483127593994,
"learning_rate": 1.0714285714285714e-05,
"loss": 0.0194,
"step": 106
},
{
"epoch": 1.0918367346938775,
"grad_norm": 0.609831690788269,
"learning_rate": 1.0816326530612246e-05,
"loss": 0.0118,
"step": 107
},
{
"epoch": 1.1020408163265305,
"grad_norm": 102.30455780029297,
"learning_rate": 1.0918367346938776e-05,
"loss": 3.5743,
"step": 108
},
{
"epoch": 1.1122448979591837,
"grad_norm": 59.90740966796875,
"learning_rate": 1.1020408163265306e-05,
"loss": 0.5869,
"step": 109
},
{
"epoch": 1.1224489795918366,
"grad_norm": 1.5409817695617676,
"learning_rate": 1.1122448979591838e-05,
"loss": 0.0305,
"step": 110
},
{
"epoch": 1.1326530612244898,
"grad_norm": 24.44378662109375,
"learning_rate": 1.1224489795918367e-05,
"loss": 0.4096,
"step": 111
},
{
"epoch": 1.1428571428571428,
"grad_norm": 77.41841888427734,
"learning_rate": 1.1326530612244899e-05,
"loss": 2.2927,
"step": 112
},
{
"epoch": 1.153061224489796,
"grad_norm": 56.17146682739258,
"learning_rate": 1.1428571428571429e-05,
"loss": 1.5007,
"step": 113
},
{
"epoch": 1.163265306122449,
"grad_norm": 59.88473129272461,
"learning_rate": 1.1530612244897961e-05,
"loss": 1.2148,
"step": 114
},
{
"epoch": 1.1734693877551021,
"grad_norm": 0.20367565751075745,
"learning_rate": 1.1632653061224491e-05,
"loss": 0.0026,
"step": 115
},
{
"epoch": 1.183673469387755,
"grad_norm": 38.7552490234375,
"learning_rate": 1.1734693877551021e-05,
"loss": 0.4087,
"step": 116
},
{
"epoch": 1.193877551020408,
"grad_norm": 3.531527280807495,
"learning_rate": 1.1836734693877552e-05,
"loss": 0.0577,
"step": 117
},
{
"epoch": 1.2040816326530612,
"grad_norm": 213.36203002929688,
"learning_rate": 1.1938775510204084e-05,
"loss": 5.2828,
"step": 118
},
{
"epoch": 1.2142857142857142,
"grad_norm": 44.07740020751953,
"learning_rate": 1.2040816326530614e-05,
"loss": 0.5063,
"step": 119
},
{
"epoch": 1.2244897959183674,
"grad_norm": 1.3714772462844849,
"learning_rate": 1.2142857142857142e-05,
"loss": 0.0159,
"step": 120
},
{
"epoch": 1.2346938775510203,
"grad_norm": 0.10171513259410858,
"learning_rate": 1.2244897959183674e-05,
"loss": 0.0006,
"step": 121
},
{
"epoch": 1.2448979591836735,
"grad_norm": 4.689838886260986,
"learning_rate": 1.2346938775510204e-05,
"loss": 0.0429,
"step": 122
},
{
"epoch": 1.2551020408163265,
"grad_norm": 97.93582153320312,
"learning_rate": 1.2448979591836736e-05,
"loss": 1.1297,
"step": 123
},
{
"epoch": 1.2653061224489797,
"grad_norm": 60.36366271972656,
"learning_rate": 1.2551020408163267e-05,
"loss": 0.9201,
"step": 124
},
{
"epoch": 1.2755102040816326,
"grad_norm": 4.615835189819336,
"learning_rate": 1.2653061224489798e-05,
"loss": 0.0284,
"step": 125
},
{
"epoch": 1.2857142857142856,
"grad_norm": 96.12593841552734,
"learning_rate": 1.2755102040816327e-05,
"loss": 1.9473,
"step": 126
},
{
"epoch": 1.2959183673469388,
"grad_norm": 4.051046371459961,
"learning_rate": 1.2857142857142859e-05,
"loss": 0.022,
"step": 127
},
{
"epoch": 1.306122448979592,
"grad_norm": 0.5018266439437866,
"learning_rate": 1.2959183673469389e-05,
"loss": 0.0054,
"step": 128
},
{
"epoch": 1.316326530612245,
"grad_norm": 6.450921058654785,
"learning_rate": 1.3061224489795918e-05,
"loss": 0.1004,
"step": 129
},
{
"epoch": 1.3265306122448979,
"grad_norm": 2.8662984371185303,
"learning_rate": 1.316326530612245e-05,
"loss": 0.0276,
"step": 130
},
{
"epoch": 1.336734693877551,
"grad_norm": 127.14810180664062,
"learning_rate": 1.326530612244898e-05,
"loss": 2.3906,
"step": 131
},
{
"epoch": 1.346938775510204,
"grad_norm": 9.02206802368164,
"learning_rate": 1.3367346938775512e-05,
"loss": 0.0375,
"step": 132
},
{
"epoch": 1.3571428571428572,
"grad_norm": 150.13641357421875,
"learning_rate": 1.3469387755102042e-05,
"loss": 4.9546,
"step": 133
},
{
"epoch": 1.3673469387755102,
"grad_norm": 12.425887107849121,
"learning_rate": 1.3571428571428574e-05,
"loss": 0.1619,
"step": 134
},
{
"epoch": 1.3775510204081631,
"grad_norm": 0.5521597862243652,
"learning_rate": 1.3673469387755102e-05,
"loss": 0.0087,
"step": 135
},
{
"epoch": 1.3877551020408163,
"grad_norm": 22.825164794921875,
"learning_rate": 1.3775510204081634e-05,
"loss": 0.3457,
"step": 136
},
{
"epoch": 1.3979591836734695,
"grad_norm": 5.346078872680664,
"learning_rate": 1.3877551020408165e-05,
"loss": 0.0816,
"step": 137
},
{
"epoch": 1.4081632653061225,
"grad_norm": 59.271095275878906,
"learning_rate": 1.3979591836734696e-05,
"loss": 1.1452,
"step": 138
},
{
"epoch": 1.4183673469387754,
"grad_norm": 62.906166076660156,
"learning_rate": 1.4081632653061225e-05,
"loss": 0.5385,
"step": 139
},
{
"epoch": 1.4285714285714286,
"grad_norm": 9.781693458557129,
"learning_rate": 1.4183673469387755e-05,
"loss": 0.1222,
"step": 140
},
{
"epoch": 1.4387755102040816,
"grad_norm": 21.102067947387695,
"learning_rate": 1.4285714285714287e-05,
"loss": 0.3915,
"step": 141
},
{
"epoch": 1.4489795918367347,
"grad_norm": 89.99552917480469,
"learning_rate": 1.4387755102040817e-05,
"loss": 3.0359,
"step": 142
},
{
"epoch": 1.4591836734693877,
"grad_norm": 18.76301383972168,
"learning_rate": 1.448979591836735e-05,
"loss": 0.2768,
"step": 143
},
{
"epoch": 1.469387755102041,
"grad_norm": 50.16675567626953,
"learning_rate": 1.4591836734693878e-05,
"loss": 0.6184,
"step": 144
},
{
"epoch": 1.4795918367346939,
"grad_norm": 133.0209503173828,
"learning_rate": 1.469387755102041e-05,
"loss": 2.7128,
"step": 145
},
{
"epoch": 1.489795918367347,
"grad_norm": 29.245777130126953,
"learning_rate": 1.479591836734694e-05,
"loss": 0.2769,
"step": 146
},
{
"epoch": 1.5,
"grad_norm": 0.3684585988521576,
"learning_rate": 1.4897959183673472e-05,
"loss": 0.0037,
"step": 147
},
{
"epoch": 1.510204081632653,
"grad_norm": 109.48973083496094,
"learning_rate": 1.5000000000000002e-05,
"loss": 1.0417,
"step": 148
},
{
"epoch": 1.5204081632653061,
"grad_norm": 64.82876586914062,
"learning_rate": 1.510204081632653e-05,
"loss": 1.4451,
"step": 149
},
{
"epoch": 1.5306122448979593,
"grad_norm": 250.61949157714844,
"learning_rate": 1.5204081632653063e-05,
"loss": 6.425,
"step": 150
},
{
"epoch": 1.5408163265306123,
"grad_norm": 34.68476867675781,
"learning_rate": 1.530612244897959e-05,
"loss": 0.3295,
"step": 151
},
{
"epoch": 1.5510204081632653,
"grad_norm": 1.7814018726348877,
"learning_rate": 1.5408163265306123e-05,
"loss": 0.0203,
"step": 152
},
{
"epoch": 1.5612244897959182,
"grad_norm": 1.4509575366973877,
"learning_rate": 1.5510204081632655e-05,
"loss": 0.0204,
"step": 153
},
{
"epoch": 1.5714285714285714,
"grad_norm": 0.14146992564201355,
"learning_rate": 1.5612244897959187e-05,
"loss": 0.0023,
"step": 154
},
{
"epoch": 1.5816326530612246,
"grad_norm": 15.823746681213379,
"learning_rate": 1.5714285714285715e-05,
"loss": 0.1413,
"step": 155
},
{
"epoch": 1.5918367346938775,
"grad_norm": 63.16089630126953,
"learning_rate": 1.5816326530612247e-05,
"loss": 1.0637,
"step": 156
},
{
"epoch": 1.6020408163265305,
"grad_norm": 12.253658294677734,
"learning_rate": 1.5918367346938776e-05,
"loss": 0.1995,
"step": 157
},
{
"epoch": 1.6122448979591837,
"grad_norm": 13.50799560546875,
"learning_rate": 1.6020408163265308e-05,
"loss": 0.0941,
"step": 158
},
{
"epoch": 1.6224489795918369,
"grad_norm": 98.67269134521484,
"learning_rate": 1.612244897959184e-05,
"loss": 3.9788,
"step": 159
},
{
"epoch": 1.6326530612244898,
"grad_norm": 41.64649963378906,
"learning_rate": 1.6224489795918368e-05,
"loss": 0.5844,
"step": 160
},
{
"epoch": 1.6428571428571428,
"grad_norm": 141.99420166015625,
"learning_rate": 1.63265306122449e-05,
"loss": 3.5071,
"step": 161
},
{
"epoch": 1.6530612244897958,
"grad_norm": 238.89817810058594,
"learning_rate": 1.642857142857143e-05,
"loss": 7.8894,
"step": 162
},
{
"epoch": 1.663265306122449,
"grad_norm": 81.87030029296875,
"learning_rate": 1.653061224489796e-05,
"loss": 3.4079,
"step": 163
},
{
"epoch": 1.6734693877551021,
"grad_norm": 178.0208740234375,
"learning_rate": 1.6632653061224492e-05,
"loss": 7.5755,
"step": 164
},
{
"epoch": 1.683673469387755,
"grad_norm": 41.76604461669922,
"learning_rate": 1.673469387755102e-05,
"loss": 0.7972,
"step": 165
},
{
"epoch": 1.693877551020408,
"grad_norm": 0.7283464670181274,
"learning_rate": 1.6836734693877553e-05,
"loss": 0.0106,
"step": 166
},
{
"epoch": 1.7040816326530612,
"grad_norm": 66.69834899902344,
"learning_rate": 1.6938775510204085e-05,
"loss": 0.5323,
"step": 167
},
{
"epoch": 1.7142857142857144,
"grad_norm": 1.6421160697937012,
"learning_rate": 1.7040816326530613e-05,
"loss": 0.0157,
"step": 168
},
{
"epoch": 1.7244897959183674,
"grad_norm": 87.25151824951172,
"learning_rate": 1.7142857142857142e-05,
"loss": 1.2181,
"step": 169
},
{
"epoch": 1.7346938775510203,
"grad_norm": 1.3664076328277588,
"learning_rate": 1.7244897959183674e-05,
"loss": 0.0096,
"step": 170
},
{
"epoch": 1.7448979591836735,
"grad_norm": 2.5815272331237793,
"learning_rate": 1.7346938775510206e-05,
"loss": 0.0152,
"step": 171
},
{
"epoch": 1.7551020408163265,
"grad_norm": 6.008739471435547,
"learning_rate": 1.7448979591836738e-05,
"loss": 0.068,
"step": 172
},
{
"epoch": 1.7653061224489797,
"grad_norm": 0.22308386862277985,
"learning_rate": 1.7551020408163266e-05,
"loss": 0.0014,
"step": 173
},
{
"epoch": 1.7755102040816326,
"grad_norm": 0.37313106656074524,
"learning_rate": 1.7653061224489798e-05,
"loss": 0.0034,
"step": 174
},
{
"epoch": 1.7857142857142856,
"grad_norm": 0.0778372585773468,
"learning_rate": 1.7755102040816327e-05,
"loss": 0.0006,
"step": 175
},
{
"epoch": 1.7959183673469388,
"grad_norm": 32.566566467285156,
"learning_rate": 1.785714285714286e-05,
"loss": 0.4503,
"step": 176
},
{
"epoch": 1.806122448979592,
"grad_norm": 117.00394439697266,
"learning_rate": 1.795918367346939e-05,
"loss": 4.1669,
"step": 177
},
{
"epoch": 1.816326530612245,
"grad_norm": 42.20817565917969,
"learning_rate": 1.806122448979592e-05,
"loss": 0.6081,
"step": 178
},
{
"epoch": 1.8265306122448979,
"grad_norm": 144.17698669433594,
"learning_rate": 1.816326530612245e-05,
"loss": 2.4056,
"step": 179
},
{
"epoch": 1.836734693877551,
"grad_norm": 45.562618255615234,
"learning_rate": 1.826530612244898e-05,
"loss": 0.5261,
"step": 180
},
{
"epoch": 1.8469387755102042,
"grad_norm": 48.302433013916016,
"learning_rate": 1.836734693877551e-05,
"loss": 0.2616,
"step": 181
},
{
"epoch": 1.8571428571428572,
"grad_norm": 14.456125259399414,
"learning_rate": 1.8469387755102043e-05,
"loss": 0.2859,
"step": 182
},
{
"epoch": 1.8673469387755102,
"grad_norm": 137.47515869140625,
"learning_rate": 1.8571428571428575e-05,
"loss": 6.4765,
"step": 183
},
{
"epoch": 1.8775510204081631,
"grad_norm": 1.0571497678756714,
"learning_rate": 1.8673469387755104e-05,
"loss": 0.0109,
"step": 184
},
{
"epoch": 1.8877551020408163,
"grad_norm": 0.37840384244918823,
"learning_rate": 1.8775510204081636e-05,
"loss": 0.0034,
"step": 185
},
{
"epoch": 1.8979591836734695,
"grad_norm": 16.713756561279297,
"learning_rate": 1.8877551020408164e-05,
"loss": 0.1816,
"step": 186
},
{
"epoch": 1.9081632653061225,
"grad_norm": 3.689077138900757,
"learning_rate": 1.8979591836734696e-05,
"loss": 0.039,
"step": 187
},
{
"epoch": 1.9183673469387754,
"grad_norm": 1.6388179063796997,
"learning_rate": 1.9081632653061225e-05,
"loss": 0.0239,
"step": 188
},
{
"epoch": 1.9285714285714286,
"grad_norm": 118.65974426269531,
"learning_rate": 1.9183673469387756e-05,
"loss": 2.548,
"step": 189
},
{
"epoch": 1.9387755102040818,
"grad_norm": 75.87044525146484,
"learning_rate": 1.928571428571429e-05,
"loss": 1.4144,
"step": 190
},
{
"epoch": 1.9489795918367347,
"grad_norm": 0.3555060923099518,
"learning_rate": 1.9387755102040817e-05,
"loss": 0.0047,
"step": 191
},
{
"epoch": 1.9591836734693877,
"grad_norm": 0.8721001744270325,
"learning_rate": 1.948979591836735e-05,
"loss": 0.0127,
"step": 192
},
{
"epoch": 1.9693877551020407,
"grad_norm": 45.04741668701172,
"learning_rate": 1.9591836734693877e-05,
"loss": 2.928,
"step": 193
},
{
"epoch": 1.9795918367346939,
"grad_norm": 0.20335637032985687,
"learning_rate": 1.969387755102041e-05,
"loss": 0.0012,
"step": 194
},
{
"epoch": 1.989795918367347,
"grad_norm": 11.921625137329102,
"learning_rate": 1.979591836734694e-05,
"loss": 0.1156,
"step": 195
},
{
"epoch": 2.0,
"grad_norm": 0.006426109466701746,
"learning_rate": 1.9897959183673473e-05,
"loss": 0.0001,
"step": 196
},
{
"epoch": 2.0,
"eval_dim_128_cosine_accuracy@1": 0.32842509603072984,
"eval_dim_128_cosine_accuracy@10": 0.4007682458386684,
"eval_dim_128_cosine_accuracy@3": 0.33354673495518566,
"eval_dim_128_cosine_accuracy@5": 0.35979513444302175,
"eval_dim_128_cosine_map@100": 0.4185928335134121,
"eval_dim_128_cosine_mrr@10": 0.34048990915188065,
"eval_dim_128_cosine_ndcg@10": 0.3532158341818938,
"eval_dim_128_cosine_precision@1": 0.32842509603072984,
"eval_dim_128_cosine_precision@10": 0.29206145966709346,
"eval_dim_128_cosine_precision@3": 0.3282116944088775,
"eval_dim_128_cosine_precision@5": 0.3201024327784891,
"eval_dim_128_cosine_recall@1": 0.03826623805026492,
"eval_dim_128_cosine_recall@10": 0.2557427522394185,
"eval_dim_128_cosine_recall@3": 0.11237350584970618,
"eval_dim_128_cosine_recall@5": 0.1705675982839957,
"eval_dim_256_cosine_accuracy@1": 0.3706786171574904,
"eval_dim_256_cosine_accuracy@10": 0.44430217669654287,
"eval_dim_256_cosine_accuracy@3": 0.37708066581306016,
"eval_dim_256_cosine_accuracy@5": 0.4026888604353393,
"eval_dim_256_cosine_map@100": 0.4581292876731238,
"eval_dim_256_cosine_mrr@10": 0.3829479401662495,
"eval_dim_256_cosine_ndcg@10": 0.3954192054513208,
"eval_dim_256_cosine_precision@1": 0.3706786171574904,
"eval_dim_256_cosine_precision@10": 0.33476312419974397,
"eval_dim_256_cosine_precision@3": 0.3704652155356381,
"eval_dim_256_cosine_precision@5": 0.36235595390524966,
"eval_dim_256_cosine_recall@1": 0.04010394552921631,
"eval_dim_256_cosine_recall@10": 0.2702458225766465,
"eval_dim_256_cosine_recall@3": 0.11731644582817367,
"eval_dim_256_cosine_recall@5": 0.17765428439223818,
"eval_dim_512_cosine_accuracy@1": 0.3886043533930858,
"eval_dim_512_cosine_accuracy@10": 0.4622279129321383,
"eval_dim_512_cosine_accuracy@3": 0.39500640204865556,
"eval_dim_512_cosine_accuracy@5": 0.4225352112676056,
"eval_dim_512_cosine_map@100": 0.47243004222010543,
"eval_dim_512_cosine_mrr@10": 0.40112569355527056,
"eval_dim_512_cosine_ndcg@10": 0.41406124757842494,
"eval_dim_512_cosine_precision@1": 0.3886043533930858,
"eval_dim_512_cosine_precision@10": 0.34935979513444304,
"eval_dim_512_cosine_precision@3": 0.38839095177123345,
"eval_dim_512_cosine_precision@5": 0.3797695262483995,
"eval_dim_512_cosine_recall@1": 0.04209995734458549,
"eval_dim_512_cosine_recall@10": 0.28234778004041294,
"eval_dim_512_cosine_recall@3": 0.12343316714240367,
"eval_dim_512_cosine_recall@5": 0.1874852461853036,
"eval_dim_64_cosine_accuracy@1": 0.2887323943661972,
"eval_dim_64_cosine_accuracy@10": 0.35147247119078107,
"eval_dim_64_cosine_accuracy@3": 0.29769526248399486,
"eval_dim_64_cosine_accuracy@5": 0.3213828425096031,
"eval_dim_64_cosine_map@100": 0.36977389907621383,
"eval_dim_64_cosine_mrr@10": 0.3001796130317253,
"eval_dim_64_cosine_ndcg@10": 0.3121069409877711,
"eval_dim_64_cosine_precision@1": 0.2887323943661972,
"eval_dim_64_cosine_precision@10": 0.25448143405889884,
"eval_dim_64_cosine_precision@3": 0.28958600085360653,
"eval_dim_64_cosine_precision@5": 0.2827144686299616,
"eval_dim_64_cosine_recall@1": 0.03545580301027414,
"eval_dim_64_cosine_recall@10": 0.23272201386526145,
"eval_dim_64_cosine_recall@3": 0.10416703458132823,
"eval_dim_64_cosine_recall@5": 0.15778648755468183,
"eval_dim_768_cosine_accuracy@1": 0.39500640204865556,
"eval_dim_768_cosine_accuracy@10": 0.4667093469910371,
"eval_dim_768_cosine_accuracy@3": 0.4039692701664533,
"eval_dim_768_cosine_accuracy@5": 0.4327784891165173,
"eval_dim_768_cosine_map@100": 0.48082492057308085,
"eval_dim_768_cosine_mrr@10": 0.40793625388695776,
"eval_dim_768_cosine_ndcg@10": 0.4221720847747187,
"eval_dim_768_cosine_precision@1": 0.39500640204865556,
"eval_dim_768_cosine_precision@10": 0.3571702944942382,
"eval_dim_768_cosine_precision@3": 0.39564660691421255,
"eval_dim_768_cosine_precision@5": 0.3882202304737516,
"eval_dim_768_cosine_recall@1": 0.04243158779272862,
"eval_dim_768_cosine_recall@10": 0.289899399385322,
"eval_dim_768_cosine_recall@3": 0.1245655383778341,
"eval_dim_768_cosine_recall@5": 0.1902548634299299,
"eval_runtime": 183.3777,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.3121069409877711,
"eval_steps_per_second": 0.0,
"step": 196
},
{
"epoch": 2.010204081632653,
"grad_norm": 19.43300437927246,
"learning_rate": 2e-05,
"loss": 0.768,
"step": 197
},
{
"epoch": 2.020408163265306,
"grad_norm": 0.857424259185791,
"learning_rate": 1.9999984141121447e-05,
"loss": 0.0073,
"step": 198
},
{
"epoch": 2.0306122448979593,
"grad_norm": 126.48216247558594,
"learning_rate": 1.9999936564536085e-05,
"loss": 1.6622,
"step": 199
},
{
"epoch": 2.0408163265306123,
"grad_norm": 0.030239321291446686,
"learning_rate": 1.9999857270394818e-05,
"loss": 0.0003,
"step": 200
},
{
"epoch": 2.0510204081632653,
"grad_norm": 3.846285343170166,
"learning_rate": 1.9999746258949146e-05,
"loss": 0.0398,
"step": 201
},
{
"epoch": 2.061224489795918,
"grad_norm": 0.007493811659514904,
"learning_rate": 1.9999603530551178e-05,
"loss": 0.0001,
"step": 202
},
{
"epoch": 2.0714285714285716,
"grad_norm": 33.33332061767578,
"learning_rate": 1.999942908565361e-05,
"loss": 0.3767,
"step": 203
},
{
"epoch": 2.0816326530612246,
"grad_norm": 25.518348693847656,
"learning_rate": 1.999922292480975e-05,
"loss": 0.4468,
"step": 204
},
{
"epoch": 2.0918367346938775,
"grad_norm": 10.444570541381836,
"learning_rate": 1.9998985048673486e-05,
"loss": 0.1021,
"step": 205
},
{
"epoch": 2.1020408163265305,
"grad_norm": 106.1369857788086,
"learning_rate": 1.9998715457999313e-05,
"loss": 1.5802,
"step": 206
},
{
"epoch": 2.1122448979591835,
"grad_norm": 18.592628479003906,
"learning_rate": 1.999841415364231e-05,
"loss": 0.1798,
"step": 207
},
{
"epoch": 2.122448979591837,
"grad_norm": 0.11251077055931091,
"learning_rate": 1.999808113655815e-05,
"loss": 0.0015,
"step": 208
},
{
"epoch": 2.13265306122449,
"grad_norm": 0.73614501953125,
"learning_rate": 1.999771640780308e-05,
"loss": 0.0055,
"step": 209
},
{
"epoch": 2.142857142857143,
"grad_norm": 54.640098571777344,
"learning_rate": 1.999731996853395e-05,
"loss": 0.6201,
"step": 210
},
{
"epoch": 2.1530612244897958,
"grad_norm": 79.83525848388672,
"learning_rate": 1.9996891820008165e-05,
"loss": 1.263,
"step": 211
},
{
"epoch": 2.163265306122449,
"grad_norm": 1.2913179397583008,
"learning_rate": 1.9996431963583724e-05,
"loss": 0.0194,
"step": 212
},
{
"epoch": 2.173469387755102,
"grad_norm": 0.05232042446732521,
"learning_rate": 1.9995940400719184e-05,
"loss": 0.0005,
"step": 213
},
{
"epoch": 2.183673469387755,
"grad_norm": 247.92056274414062,
"learning_rate": 1.9995417132973674e-05,
"loss": 10.7772,
"step": 214
},
{
"epoch": 2.193877551020408,
"grad_norm": 79.94287872314453,
"learning_rate": 1.999486216200688e-05,
"loss": 1.4789,
"step": 215
},
{
"epoch": 2.204081632653061,
"grad_norm": 31.259197235107422,
"learning_rate": 1.999427548957905e-05,
"loss": 0.3912,
"step": 216
},
{
"epoch": 2.2142857142857144,
"grad_norm": 31.51141357421875,
"learning_rate": 1.9993657117550972e-05,
"loss": 0.2786,
"step": 217
},
{
"epoch": 2.2244897959183674,
"grad_norm": 39.90089797973633,
"learning_rate": 1.9993007047883988e-05,
"loss": 0.6376,
"step": 218
},
{
"epoch": 2.2346938775510203,
"grad_norm": 0.3418372869491577,
"learning_rate": 1.999232528263997e-05,
"loss": 0.0059,
"step": 219
},
{
"epoch": 2.2448979591836733,
"grad_norm": 74.24694061279297,
"learning_rate": 1.9991611823981322e-05,
"loss": 1.3822,
"step": 220
},
{
"epoch": 2.2551020408163267,
"grad_norm": 101.86847686767578,
"learning_rate": 1.9990866674170984e-05,
"loss": 1.2364,
"step": 221
},
{
"epoch": 2.2653061224489797,
"grad_norm": 160.41319274902344,
"learning_rate": 1.99900898355724e-05,
"loss": 2.8296,
"step": 222
},
{
"epoch": 2.2755102040816326,
"grad_norm": 42.205142974853516,
"learning_rate": 1.9989281310649516e-05,
"loss": 0.47,
"step": 223
},
{
"epoch": 2.2857142857142856,
"grad_norm": 78.7785415649414,
"learning_rate": 1.9988441101966807e-05,
"loss": 1.2266,
"step": 224
},
{
"epoch": 2.295918367346939,
"grad_norm": 1.0640811920166016,
"learning_rate": 1.9987569212189224e-05,
"loss": 0.0115,
"step": 225
},
{
"epoch": 2.306122448979592,
"grad_norm": 3.0871458053588867,
"learning_rate": 1.9986665644082204e-05,
"loss": 0.017,
"step": 226
},
{
"epoch": 2.316326530612245,
"grad_norm": 2.008272647857666,
"learning_rate": 1.9985730400511658e-05,
"loss": 0.0165,
"step": 227
},
{
"epoch": 2.326530612244898,
"grad_norm": 7.105755805969238,
"learning_rate": 1.998476348444397e-05,
"loss": 0.0807,
"step": 228
},
{
"epoch": 2.336734693877551,
"grad_norm": 42.1352424621582,
"learning_rate": 1.998376489894599e-05,
"loss": 0.3864,
"step": 229
},
{
"epoch": 2.3469387755102042,
"grad_norm": 11.61186408996582,
"learning_rate": 1.9982734647184997e-05,
"loss": 0.2179,
"step": 230
},
{
"epoch": 2.357142857142857,
"grad_norm": 380.5256042480469,
"learning_rate": 1.998167273242872e-05,
"loss": 9.596,
"step": 231
},
{
"epoch": 2.36734693877551,
"grad_norm": 219.15957641601562,
"learning_rate": 1.9980579158045322e-05,
"loss": 3.8921,
"step": 232
},
{
"epoch": 2.377551020408163,
"grad_norm": 7.717984676361084,
"learning_rate": 1.9979453927503366e-05,
"loss": 0.0677,
"step": 233
},
{
"epoch": 2.387755102040816,
"grad_norm": 1.607508897781372,
"learning_rate": 1.9978297044371834e-05,
"loss": 0.0184,
"step": 234
},
{
"epoch": 2.3979591836734695,
"grad_norm": 11.009926795959473,
"learning_rate": 1.9977108512320103e-05,
"loss": 0.1947,
"step": 235
},
{
"epoch": 2.4081632653061225,
"grad_norm": 43.153709411621094,
"learning_rate": 1.9975888335117927e-05,
"loss": 0.5775,
"step": 236
},
{
"epoch": 2.4183673469387754,
"grad_norm": 27.642406463623047,
"learning_rate": 1.9974636516635436e-05,
"loss": 0.1769,
"step": 237
},
{
"epoch": 2.4285714285714284,
"grad_norm": 1.5701595544815063,
"learning_rate": 1.9973353060843118e-05,
"loss": 0.0112,
"step": 238
},
{
"epoch": 2.438775510204082,
"grad_norm": 312.0637512207031,
"learning_rate": 1.9972037971811802e-05,
"loss": 9.3438,
"step": 239
},
{
"epoch": 2.4489795918367347,
"grad_norm": 8.197367668151855,
"learning_rate": 1.9970691253712663e-05,
"loss": 0.092,
"step": 240
},
{
"epoch": 2.4591836734693877,
"grad_norm": 38.18027114868164,
"learning_rate": 1.9969312910817183e-05,
"loss": 0.8527,
"step": 241
},
{
"epoch": 2.4693877551020407,
"grad_norm": 16.030864715576172,
"learning_rate": 1.9967902947497158e-05,
"loss": 0.1134,
"step": 242
},
{
"epoch": 2.479591836734694,
"grad_norm": 0.02204311266541481,
"learning_rate": 1.9966461368224676e-05,
"loss": 0.0002,
"step": 243
},
{
"epoch": 2.489795918367347,
"grad_norm": 0.6758233904838562,
"learning_rate": 1.9964988177572106e-05,
"loss": 0.0092,
"step": 244
},
{
"epoch": 2.5,
"grad_norm": 0.1679491549730301,
"learning_rate": 1.996348338021207e-05,
"loss": 0.002,
"step": 245
},
{
"epoch": 2.510204081632653,
"grad_norm": 124.84944152832031,
"learning_rate": 1.9961946980917457e-05,
"loss": 9.4742,
"step": 246
},
{
"epoch": 2.520408163265306,
"grad_norm": 304.3159484863281,
"learning_rate": 1.9960378984561377e-05,
"loss": 8.5164,
"step": 247
},
{
"epoch": 2.5306122448979593,
"grad_norm": 137.6317138671875,
"learning_rate": 1.9958779396117162e-05,
"loss": 2.4357,
"step": 248
},
{
"epoch": 2.5408163265306123,
"grad_norm": 88.31053161621094,
"learning_rate": 1.9957148220658348e-05,
"loss": 1.1891,
"step": 249
},
{
"epoch": 2.5510204081632653,
"grad_norm": 270.725341796875,
"learning_rate": 1.9955485463358655e-05,
"loss": 4.1178,
"step": 250
},
{
"epoch": 2.561224489795918,
"grad_norm": 0.07664936035871506,
"learning_rate": 1.9953791129491985e-05,
"loss": 0.001,
"step": 251
},
{
"epoch": 2.571428571428571,
"grad_norm": 17.77437400817871,
"learning_rate": 1.9952065224432376e-05,
"loss": 0.1828,
"step": 252
},
{
"epoch": 2.5816326530612246,
"grad_norm": 208.72927856445312,
"learning_rate": 1.9950307753654016e-05,
"loss": 4.9505,
"step": 253
},
{
"epoch": 2.5918367346938775,
"grad_norm": 77.77067565917969,
"learning_rate": 1.9948518722731208e-05,
"loss": 0.8772,
"step": 254
},
{
"epoch": 2.6020408163265305,
"grad_norm": 4.941845893859863,
"learning_rate": 1.9946698137338357e-05,
"loss": 0.054,
"step": 255
},
{
"epoch": 2.612244897959184,
"grad_norm": 79.66358947753906,
"learning_rate": 1.994484600324995e-05,
"loss": 1.2223,
"step": 256
},
{
"epoch": 2.622448979591837,
"grad_norm": 49.39836120605469,
"learning_rate": 1.994296232634054e-05,
"loss": 0.5202,
"step": 257
},
{
"epoch": 2.63265306122449,
"grad_norm": 0.2510567009449005,
"learning_rate": 1.994104711258473e-05,
"loss": 0.002,
"step": 258
},
{
"epoch": 2.642857142857143,
"grad_norm": 0.1556750386953354,
"learning_rate": 1.9939100368057144e-05,
"loss": 0.0017,
"step": 259
},
{
"epoch": 2.6530612244897958,
"grad_norm": 0.1744566559791565,
"learning_rate": 1.9937122098932428e-05,
"loss": 0.0026,
"step": 260
},
{
"epoch": 2.663265306122449,
"grad_norm": 64.05052947998047,
"learning_rate": 1.99351123114852e-05,
"loss": 0.4856,
"step": 261
},
{
"epoch": 2.673469387755102,
"grad_norm": 0.5290594100952148,
"learning_rate": 1.993307101209006e-05,
"loss": 0.0067,
"step": 262
},
{
"epoch": 2.683673469387755,
"grad_norm": 105.92308044433594,
"learning_rate": 1.993099820722155e-05,
"loss": 1.2193,
"step": 263
},
{
"epoch": 2.693877551020408,
"grad_norm": 77.6264877319336,
"learning_rate": 1.992889390345414e-05,
"loss": 2.4912,
"step": 264
},
{
"epoch": 2.704081632653061,
"grad_norm": 0.30245572328567505,
"learning_rate": 1.9926758107462208e-05,
"loss": 0.0031,
"step": 265
},
{
"epoch": 2.7142857142857144,
"grad_norm": 41.0025749206543,
"learning_rate": 1.9924590826020027e-05,
"loss": 0.5973,
"step": 266
},
{
"epoch": 2.7244897959183674,
"grad_norm": 0.0627225786447525,
"learning_rate": 1.9922392066001724e-05,
"loss": 0.0007,
"step": 267
},
{
"epoch": 2.7346938775510203,
"grad_norm": 47.067935943603516,
"learning_rate": 1.992016183438127e-05,
"loss": 1.3781,
"step": 268
},
{
"epoch": 2.7448979591836737,
"grad_norm": 0.5052754878997803,
"learning_rate": 1.991790013823246e-05,
"loss": 0.0083,
"step": 269
},
{
"epoch": 2.7551020408163263,
"grad_norm": 0.01090068370103836,
"learning_rate": 1.9915606984728896e-05,
"loss": 0.0001,
"step": 270
},
{
"epoch": 2.7653061224489797,
"grad_norm": 31.127262115478516,
"learning_rate": 1.9913282381143934e-05,
"loss": 0.2631,
"step": 271
},
{
"epoch": 2.7755102040816326,
"grad_norm": 7.258132457733154,
"learning_rate": 1.99109263348507e-05,
"loss": 0.0525,
"step": 272
},
{
"epoch": 2.7857142857142856,
"grad_norm": 0.05350486561655998,
"learning_rate": 1.9908538853322046e-05,
"loss": 0.0008,
"step": 273
},
{
"epoch": 2.795918367346939,
"grad_norm": 7.694390773773193,
"learning_rate": 1.9906119944130527e-05,
"loss": 0.0738,
"step": 274
},
{
"epoch": 2.806122448979592,
"grad_norm": 0.25629743933677673,
"learning_rate": 1.9903669614948382e-05,
"loss": 0.0019,
"step": 275
},
{
"epoch": 2.816326530612245,
"grad_norm": 0.04774434119462967,
"learning_rate": 1.9901187873547504e-05,
"loss": 0.0008,
"step": 276
},
{
"epoch": 2.826530612244898,
"grad_norm": 29.360733032226562,
"learning_rate": 1.9898674727799418e-05,
"loss": 0.4261,
"step": 277
},
{
"epoch": 2.836734693877551,
"grad_norm": 0.4614088535308838,
"learning_rate": 1.9896130185675263e-05,
"loss": 0.0072,
"step": 278
},
{
"epoch": 2.8469387755102042,
"grad_norm": 59.63426971435547,
"learning_rate": 1.9893554255245748e-05,
"loss": 1.9606,
"step": 279
},
{
"epoch": 2.857142857142857,
"grad_norm": 2.8956282138824463,
"learning_rate": 1.9890946944681157e-05,
"loss": 0.0348,
"step": 280
},
{
"epoch": 2.86734693877551,
"grad_norm": 196.1266326904297,
"learning_rate": 1.9888308262251286e-05,
"loss": 0.1742,
"step": 281
},
{
"epoch": 2.877551020408163,
"grad_norm": 0.15560828149318695,
"learning_rate": 1.988563821632545e-05,
"loss": 0.0018,
"step": 282
},
{
"epoch": 2.887755102040816,
"grad_norm": 18.213293075561523,
"learning_rate": 1.9882936815372432e-05,
"loss": 0.3129,
"step": 283
},
{
"epoch": 2.8979591836734695,
"grad_norm": 93.50361633300781,
"learning_rate": 1.9880204067960473e-05,
"loss": 0.3552,
"step": 284
},
{
"epoch": 2.9081632653061225,
"grad_norm": 88.30142974853516,
"learning_rate": 1.9877439982757228e-05,
"loss": 1.901,
"step": 285
},
{
"epoch": 2.9183673469387754,
"grad_norm": 14.138421058654785,
"learning_rate": 1.9874644568529763e-05,
"loss": 0.1566,
"step": 286
},
{
"epoch": 2.928571428571429,
"grad_norm": 1.8858366012573242,
"learning_rate": 1.9871817834144506e-05,
"loss": 0.0247,
"step": 287
},
{
"epoch": 2.938775510204082,
"grad_norm": 0.07699646055698395,
"learning_rate": 1.9868959788567213e-05,
"loss": 0.0009,
"step": 288
},
{
"epoch": 2.9489795918367347,
"grad_norm": 0.006560923531651497,
"learning_rate": 1.9866070440862977e-05,
"loss": 0.0001,
"step": 289
},
{
"epoch": 2.9591836734693877,
"grad_norm": 0.0304582379758358,
"learning_rate": 1.9863149800196152e-05,
"loss": 0.0004,
"step": 290
},
{
"epoch": 2.9693877551020407,
"grad_norm": 2.932767868041992,
"learning_rate": 1.9860197875830355e-05,
"loss": 0.0262,
"step": 291
},
{
"epoch": 2.979591836734694,
"grad_norm": 5.625049591064453,
"learning_rate": 1.9857214677128436e-05,
"loss": 0.0334,
"step": 292
},
{
"epoch": 2.989795918367347,
"grad_norm": 1.7222120761871338,
"learning_rate": 1.9854200213552426e-05,
"loss": 0.0146,
"step": 293
},
{
"epoch": 3.0,
"grad_norm": 0.34878501296043396,
"learning_rate": 1.985115449466353e-05,
"loss": 0.0044,
"step": 294
},
{
"epoch": 3.0,
"eval_dim_128_cosine_accuracy@1": 0.353393085787452,
"eval_dim_128_cosine_accuracy@10": 0.4186939820742638,
"eval_dim_128_cosine_accuracy@3": 0.3617157490396927,
"eval_dim_128_cosine_accuracy@5": 0.3879641485275288,
"eval_dim_128_cosine_map@100": 0.44283999281955866,
"eval_dim_128_cosine_mrr@10": 0.3652353515029568,
"eval_dim_128_cosine_ndcg@10": 0.3779175151946445,
"eval_dim_128_cosine_precision@1": 0.353393085787452,
"eval_dim_128_cosine_precision@10": 0.3151728553137004,
"eval_dim_128_cosine_precision@3": 0.3542466922748612,
"eval_dim_128_cosine_precision@5": 0.34609475032010245,
"eval_dim_128_cosine_recall@1": 0.03978744960200595,
"eval_dim_128_cosine_recall@10": 0.26555745666889424,
"eval_dim_128_cosine_recall@3": 0.117127816905993,
"eval_dim_128_cosine_recall@5": 0.17767259683097839,
"eval_dim_256_cosine_accuracy@1": 0.3905249679897567,
"eval_dim_256_cosine_accuracy@10": 0.45966709346991036,
"eval_dim_256_cosine_accuracy@3": 0.3994878361075544,
"eval_dim_256_cosine_accuracy@5": 0.4263764404609475,
"eval_dim_256_cosine_map@100": 0.4797247358039393,
"eval_dim_256_cosine_mrr@10": 0.4030391947645464,
"eval_dim_256_cosine_ndcg@10": 0.41529982696069817,
"eval_dim_256_cosine_precision@1": 0.3905249679897567,
"eval_dim_256_cosine_precision@10": 0.3473111395646607,
"eval_dim_256_cosine_precision@3": 0.39137857447716595,
"eval_dim_256_cosine_precision@5": 0.3824583866837389,
"eval_dim_256_cosine_recall@1": 0.04379643457341558,
"eval_dim_256_cosine_recall@10": 0.2849589544480218,
"eval_dim_256_cosine_recall@3": 0.12838062817408333,
"eval_dim_256_cosine_recall@5": 0.19365867195858333,
"eval_dim_512_cosine_accuracy@1": 0.41165172855313703,
"eval_dim_512_cosine_accuracy@10": 0.4820742637644046,
"eval_dim_512_cosine_accuracy@3": 0.4206145966709347,
"eval_dim_512_cosine_accuracy@5": 0.4513444302176697,
"eval_dim_512_cosine_map@100": 0.5006190134140577,
"eval_dim_512_cosine_mrr@10": 0.42457192650854575,
"eval_dim_512_cosine_ndcg@10": 0.43828991743977486,
"eval_dim_512_cosine_precision@1": 0.41165172855313703,
"eval_dim_512_cosine_precision@10": 0.37048655569782335,
"eval_dim_512_cosine_precision@3": 0.41229193341869397,
"eval_dim_512_cosine_precision@5": 0.404225352112676,
"eval_dim_512_cosine_recall@1": 0.04453915204812793,
"eval_dim_512_cosine_recall@10": 0.2976368767832588,
"eval_dim_512_cosine_recall@3": 0.13075976756926563,
"eval_dim_512_cosine_recall@5": 0.19871584447147544,
"eval_dim_64_cosine_accuracy@1": 0.31241997439180536,
"eval_dim_64_cosine_accuracy@10": 0.3674775928297055,
"eval_dim_64_cosine_accuracy@3": 0.3181818181818182,
"eval_dim_64_cosine_accuracy@5": 0.33674775928297057,
"eval_dim_64_cosine_map@100": 0.3932871188436923,
"eval_dim_64_cosine_mrr@10": 0.32184292624433436,
"eval_dim_64_cosine_ndcg@10": 0.33149908528792255,
"eval_dim_64_cosine_precision@1": 0.31241997439180536,
"eval_dim_64_cosine_precision@10": 0.27151088348271446,
"eval_dim_64_cosine_precision@3": 0.31241997439180536,
"eval_dim_64_cosine_precision@5": 0.3029449423815621,
"eval_dim_64_cosine_recall@1": 0.03691005165108737,
"eval_dim_64_cosine_recall@10": 0.23779183482125327,
"eval_dim_64_cosine_recall@3": 0.1083213328481372,
"eval_dim_64_cosine_recall@5": 0.16275808863008476,
"eval_dim_768_cosine_accuracy@1": 0.4199743918053777,
"eval_dim_768_cosine_accuracy@10": 0.49551856594110116,
"eval_dim_768_cosine_accuracy@3": 0.42893725992317544,
"eval_dim_768_cosine_accuracy@5": 0.4622279129321383,
"eval_dim_768_cosine_map@100": 0.5084691302256611,
"eval_dim_768_cosine_mrr@10": 0.43367096518504944,
"eval_dim_768_cosine_ndcg@10": 0.44799498237634766,
"eval_dim_768_cosine_precision@1": 0.4199743918053777,
"eval_dim_768_cosine_precision@10": 0.3802176696542894,
"eval_dim_768_cosine_precision@3": 0.42040119504908235,
"eval_dim_768_cosine_precision@5": 0.41254801536491675,
"eval_dim_768_cosine_recall@1": 0.04510157469618885,
"eval_dim_768_cosine_recall@10": 0.30249531360222554,
"eval_dim_768_cosine_recall@3": 0.13229329889404273,
"eval_dim_768_cosine_recall@5": 0.2009019928625879,
"eval_runtime": 184.2103,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.33149908528792255,
"eval_steps_per_second": 0.0,
"step": 294
},
{
"epoch": 3.010204081632653,
"grad_norm": 22.25138282775879,
"learning_rate": 1.9848077530122083e-05,
"loss": 0.2686,
"step": 295
},
{
"epoch": 3.020408163265306,
"grad_norm": 0.13433043658733368,
"learning_rate": 1.9844969329687526e-05,
"loss": 0.0008,
"step": 296
},
{
"epoch": 3.0306122448979593,
"grad_norm": 0.8659433126449585,
"learning_rate": 1.9841829903218377e-05,
"loss": 0.0106,
"step": 297
},
{
"epoch": 3.0408163265306123,
"grad_norm": 5.649078845977783,
"learning_rate": 1.983865926067219e-05,
"loss": 0.0551,
"step": 298
},
{
"epoch": 3.0510204081632653,
"grad_norm": 179.68861389160156,
"learning_rate": 1.983545741210553e-05,
"loss": 1.2816,
"step": 299
},
{
"epoch": 3.061224489795918,
"grad_norm": 0.24586763978004456,
"learning_rate": 1.9832224367673945e-05,
"loss": 0.002,
"step": 300
},
{
"epoch": 3.0714285714285716,
"grad_norm": 2.8069469928741455,
"learning_rate": 1.9828960137631927e-05,
"loss": 0.0406,
"step": 301
},
{
"epoch": 3.0816326530612246,
"grad_norm": 0.5202066898345947,
"learning_rate": 1.9825664732332886e-05,
"loss": 0.0081,
"step": 302
},
{
"epoch": 3.0918367346938775,
"grad_norm": 0.8059203028678894,
"learning_rate": 1.98223381622291e-05,
"loss": 0.0064,
"step": 303
},
{
"epoch": 3.1020408163265305,
"grad_norm": 0.45694583654403687,
"learning_rate": 1.9818980437871707e-05,
"loss": 0.0061,
"step": 304
},
{
"epoch": 3.1122448979591835,
"grad_norm": 29.932289123535156,
"learning_rate": 1.9815591569910654e-05,
"loss": 0.4775,
"step": 305
},
{
"epoch": 3.122448979591837,
"grad_norm": 37.35374069213867,
"learning_rate": 1.9812171569094675e-05,
"loss": 0.3185,
"step": 306
},
{
"epoch": 3.13265306122449,
"grad_norm": 0.9324603080749512,
"learning_rate": 1.980872044627124e-05,
"loss": 0.0105,
"step": 307
},
{
"epoch": 3.142857142857143,
"grad_norm": 0.01630260981619358,
"learning_rate": 1.980523821238654e-05,
"loss": 0.0001,
"step": 308
},
{
"epoch": 3.1530612244897958,
"grad_norm": 368.7991638183594,
"learning_rate": 1.9801724878485438e-05,
"loss": 10.5217,
"step": 309
},
{
"epoch": 3.163265306122449,
"grad_norm": 0.37166470289230347,
"learning_rate": 1.9798180455711445e-05,
"loss": 0.0041,
"step": 310
},
{
"epoch": 3.173469387755102,
"grad_norm": 12.572883605957031,
"learning_rate": 1.9794604955306668e-05,
"loss": 0.1077,
"step": 311
},
{
"epoch": 3.183673469387755,
"grad_norm": 6.274280071258545,
"learning_rate": 1.97909983886118e-05,
"loss": 0.0984,
"step": 312
},
{
"epoch": 3.193877551020408,
"grad_norm": 3.7283997535705566,
"learning_rate": 1.9787360767066054e-05,
"loss": 0.0279,
"step": 313
},
{
"epoch": 3.204081632653061,
"grad_norm": 0.09550534188747406,
"learning_rate": 1.9783692102207156e-05,
"loss": 0.0009,
"step": 314
},
{
"epoch": 3.2142857142857144,
"grad_norm": 9.320639610290527,
"learning_rate": 1.9779992405671284e-05,
"loss": 0.1379,
"step": 315
},
{
"epoch": 3.2244897959183674,
"grad_norm": 0.0006459120777435601,
"learning_rate": 1.977626168919305e-05,
"loss": 0.0,
"step": 316
},
{
"epoch": 3.2346938775510203,
"grad_norm": 0.02275056019425392,
"learning_rate": 1.977249996460544e-05,
"loss": 0.0003,
"step": 317
},
{
"epoch": 3.2448979591836733,
"grad_norm": 12.579317092895508,
"learning_rate": 1.976870724383981e-05,
"loss": 0.0852,
"step": 318
},
{
"epoch": 3.2551020408163267,
"grad_norm": 0.0938275083899498,
"learning_rate": 1.9764883538925822e-05,
"loss": 0.0015,
"step": 319
},
{
"epoch": 3.2653061224489797,
"grad_norm": 0.10724499076604843,
"learning_rate": 1.9761028861991406e-05,
"loss": 0.0011,
"step": 320
},
{
"epoch": 3.2755102040816326,
"grad_norm": 0.0515306331217289,
"learning_rate": 1.975714322526273e-05,
"loss": 0.0006,
"step": 321
},
{
"epoch": 3.2857142857142856,
"grad_norm": 61.047203063964844,
"learning_rate": 1.9753226641064164e-05,
"loss": 1.2658,
"step": 322
},
{
"epoch": 3.295918367346939,
"grad_norm": 8.448295593261719,
"learning_rate": 1.9749279121818235e-05,
"loss": 0.0457,
"step": 323
},
{
"epoch": 3.306122448979592,
"grad_norm": 1.0811445713043213,
"learning_rate": 1.974530068004559e-05,
"loss": 0.0111,
"step": 324
},
{
"epoch": 3.316326530612245,
"grad_norm": 43.832454681396484,
"learning_rate": 1.9741291328364955e-05,
"loss": 1.0571,
"step": 325
},
{
"epoch": 3.326530612244898,
"grad_norm": 0.005959355738013983,
"learning_rate": 1.973725107949309e-05,
"loss": 0.0001,
"step": 326
},
{
"epoch": 3.336734693877551,
"grad_norm": 0.15442679822444916,
"learning_rate": 1.973317994624476e-05,
"loss": 0.0014,
"step": 327
},
{
"epoch": 3.3469387755102042,
"grad_norm": 44.03955078125,
"learning_rate": 1.9729077941532687e-05,
"loss": 0.3352,
"step": 328
},
{
"epoch": 3.357142857142857,
"grad_norm": 66.37464141845703,
"learning_rate": 1.9724945078367513e-05,
"loss": 1.3782,
"step": 329
},
{
"epoch": 3.36734693877551,
"grad_norm": 3.3627383708953857,
"learning_rate": 1.9720781369857747e-05,
"loss": 0.008,
"step": 330
},
{
"epoch": 3.377551020408163,
"grad_norm": 0.06466995179653168,
"learning_rate": 1.9716586829209743e-05,
"loss": 0.0007,
"step": 331
},
{
"epoch": 3.387755102040816,
"grad_norm": 0.16937388479709625,
"learning_rate": 1.971236146972764e-05,
"loss": 0.0018,
"step": 332
},
{
"epoch": 3.3979591836734695,
"grad_norm": 26.774580001831055,
"learning_rate": 1.9708105304813333e-05,
"loss": 0.1579,
"step": 333
},
{
"epoch": 3.4081632653061225,
"grad_norm": 36.40068054199219,
"learning_rate": 1.970381834796642e-05,
"loss": 0.3014,
"step": 334
},
{
"epoch": 3.4183673469387754,
"grad_norm": 12.088098526000977,
"learning_rate": 1.969950061278417e-05,
"loss": 0.0626,
"step": 335
},
{
"epoch": 3.4285714285714284,
"grad_norm": 0.4224468469619751,
"learning_rate": 1.969515211296147e-05,
"loss": 0.0074,
"step": 336
},
{
"epoch": 3.438775510204082,
"grad_norm": 0.3234362006187439,
"learning_rate": 1.969077286229078e-05,
"loss": 0.002,
"step": 337
},
{
"epoch": 3.4489795918367347,
"grad_norm": 0.7104203701019287,
"learning_rate": 1.968636287466211e-05,
"loss": 0.0047,
"step": 338
},
{
"epoch": 3.4591836734693877,
"grad_norm": 4.555932521820068,
"learning_rate": 1.9681922164062945e-05,
"loss": 0.0601,
"step": 339
},
{
"epoch": 3.4693877551020407,
"grad_norm": 1.1395331621170044,
"learning_rate": 1.967745074457823e-05,
"loss": 0.0119,
"step": 340
},
{
"epoch": 3.479591836734694,
"grad_norm": 0.044391512870788574,
"learning_rate": 1.9672948630390296e-05,
"loss": 0.0003,
"step": 341
},
{
"epoch": 3.489795918367347,
"grad_norm": 1.9847981929779053,
"learning_rate": 1.9668415835778845e-05,
"loss": 0.0319,
"step": 342
},
{
"epoch": 3.5,
"grad_norm": 1.8022907972335815,
"learning_rate": 1.9663852375120882e-05,
"loss": 0.024,
"step": 343
},
{
"epoch": 3.510204081632653,
"grad_norm": 0.36603665351867676,
"learning_rate": 1.9659258262890683e-05,
"loss": 0.0034,
"step": 344
},
{
"epoch": 3.520408163265306,
"grad_norm": 20.221256256103516,
"learning_rate": 1.9654633513659743e-05,
"loss": 0.1909,
"step": 345
},
{
"epoch": 3.5306122448979593,
"grad_norm": 4.926142692565918,
"learning_rate": 1.9649978142096726e-05,
"loss": 0.08,
"step": 346
},
{
"epoch": 3.5408163265306123,
"grad_norm": 0.02573891542851925,
"learning_rate": 1.9645292162967426e-05,
"loss": 0.0003,
"step": 347
},
{
"epoch": 3.5510204081632653,
"grad_norm": 3.7188498973846436,
"learning_rate": 1.964057559113472e-05,
"loss": 0.0396,
"step": 348
},
{
"epoch": 3.561224489795918,
"grad_norm": 1.5656611919403076,
"learning_rate": 1.9635828441558515e-05,
"loss": 0.0127,
"step": 349
},
{
"epoch": 3.571428571428571,
"grad_norm": 1.1211833953857422,
"learning_rate": 1.9631050729295705e-05,
"loss": 0.0146,
"step": 350
},
{
"epoch": 3.5816326530612246,
"grad_norm": 14.682442665100098,
"learning_rate": 1.962624246950012e-05,
"loss": 0.0916,
"step": 351
},
{
"epoch": 3.5918367346938775,
"grad_norm": 9.328814506530762,
"learning_rate": 1.9621403677422487e-05,
"loss": 0.075,
"step": 352
},
{
"epoch": 3.6020408163265305,
"grad_norm": 0.16812823712825775,
"learning_rate": 1.9616534368410364e-05,
"loss": 0.0012,
"step": 353
},
{
"epoch": 3.612244897959184,
"grad_norm": 21.99177360534668,
"learning_rate": 1.961163455790811e-05,
"loss": 0.4742,
"step": 354
},
{
"epoch": 3.622448979591837,
"grad_norm": 0.015136899426579475,
"learning_rate": 1.960670426145682e-05,
"loss": 0.0002,
"step": 355
},
{
"epoch": 3.63265306122449,
"grad_norm": 4.217746257781982,
"learning_rate": 1.9601743494694295e-05,
"loss": 0.0332,
"step": 356
},
{
"epoch": 3.642857142857143,
"grad_norm": 8.988768577575684,
"learning_rate": 1.959675227335497e-05,
"loss": 0.1531,
"step": 357
},
{
"epoch": 3.6530612244897958,
"grad_norm": 0.8388896584510803,
"learning_rate": 1.9591730613269878e-05,
"loss": 0.0094,
"step": 358
},
{
"epoch": 3.663265306122449,
"grad_norm": 1.1567710638046265,
"learning_rate": 1.9586678530366607e-05,
"loss": 0.0141,
"step": 359
},
{
"epoch": 3.673469387755102,
"grad_norm": 0.5974036455154419,
"learning_rate": 1.9581596040669225e-05,
"loss": 0.005,
"step": 360
},
{
"epoch": 3.683673469387755,
"grad_norm": 2.6495542526245117,
"learning_rate": 1.9576483160298246e-05,
"loss": 0.0292,
"step": 361
},
{
"epoch": 3.693877551020408,
"grad_norm": 9.135188102722168,
"learning_rate": 1.9571339905470587e-05,
"loss": 0.0856,
"step": 362
},
{
"epoch": 3.704081632653061,
"grad_norm": 34.383392333984375,
"learning_rate": 1.9566166292499497e-05,
"loss": 0.5175,
"step": 363
},
{
"epoch": 3.7142857142857144,
"grad_norm": 64.9215087890625,
"learning_rate": 1.956096233779451e-05,
"loss": 0.7858,
"step": 364
},
{
"epoch": 3.7244897959183674,
"grad_norm": 3.036076545715332,
"learning_rate": 1.955572805786141e-05,
"loss": 0.0228,
"step": 365
},
{
"epoch": 3.7346938775510203,
"grad_norm": 0.060048237442970276,
"learning_rate": 1.9550463469302156e-05,
"loss": 0.0007,
"step": 366
},
{
"epoch": 3.7448979591836737,
"grad_norm": 14.525885581970215,
"learning_rate": 1.954516858881484e-05,
"loss": 0.1121,
"step": 367
},
{
"epoch": 3.7551020408163263,
"grad_norm": 0.025440210476517677,
"learning_rate": 1.953984343319364e-05,
"loss": 0.0003,
"step": 368
},
{
"epoch": 3.7653061224489797,
"grad_norm": 16.654895782470703,
"learning_rate": 1.953448801932875e-05,
"loss": 0.1813,
"step": 369
},
{
"epoch": 3.7755102040816326,
"grad_norm": 1.6078295707702637,
"learning_rate": 1.952910236420635e-05,
"loss": 0.0109,
"step": 370
},
{
"epoch": 3.7857142857142856,
"grad_norm": 0.4291606545448303,
"learning_rate": 1.9523686484908523e-05,
"loss": 0.0042,
"step": 371
},
{
"epoch": 3.795918367346939,
"grad_norm": 0.01769206114113331,
"learning_rate": 1.9518240398613226e-05,
"loss": 0.0002,
"step": 372
},
{
"epoch": 3.806122448979592,
"grad_norm": 6.454762935638428,
"learning_rate": 1.951276412259422e-05,
"loss": 0.0645,
"step": 373
},
{
"epoch": 3.816326530612245,
"grad_norm": 0.010405894368886948,
"learning_rate": 1.950725767422103e-05,
"loss": 0.0001,
"step": 374
},
{
"epoch": 3.826530612244898,
"grad_norm": 0.08737888187170029,
"learning_rate": 1.9501721070958868e-05,
"loss": 0.0007,
"step": 375
},
{
"epoch": 3.836734693877551,
"grad_norm": 0.008461426012217999,
"learning_rate": 1.9496154330368605e-05,
"loss": 0.0001,
"step": 376
},
{
"epoch": 3.8469387755102042,
"grad_norm": 0.03447158262133598,
"learning_rate": 1.949055747010669e-05,
"loss": 0.0004,
"step": 377
},
{
"epoch": 3.857142857142857,
"grad_norm": 0.08535359054803848,
"learning_rate": 1.9484930507925105e-05,
"loss": 0.0008,
"step": 378
},
{
"epoch": 3.86734693877551,
"grad_norm": 7.384174346923828,
"learning_rate": 1.947927346167132e-05,
"loss": 0.0635,
"step": 379
},
{
"epoch": 3.877551020408163,
"grad_norm": 0.10105583816766739,
"learning_rate": 1.9473586349288213e-05,
"loss": 0.0009,
"step": 380
},
{
"epoch": 3.887755102040816,
"grad_norm": 35.104393005371094,
"learning_rate": 1.9467869188814024e-05,
"loss": 0.9885,
"step": 381
},
{
"epoch": 3.8979591836734695,
"grad_norm": 3.9278883934020996,
"learning_rate": 1.946212199838231e-05,
"loss": 0.0363,
"step": 382
},
{
"epoch": 3.9081632653061225,
"grad_norm": 10.301095962524414,
"learning_rate": 1.945634479622187e-05,
"loss": 0.144,
"step": 383
},
{
"epoch": 3.9183673469387754,
"grad_norm": 42.96265411376953,
"learning_rate": 1.9450537600656688e-05,
"loss": 1.6117,
"step": 384
},
{
"epoch": 3.928571428571429,
"grad_norm": 52.48064041137695,
"learning_rate": 1.9444700430105892e-05,
"loss": 0.6172,
"step": 385
},
{
"epoch": 3.938775510204082,
"grad_norm": 1.2879438400268555,
"learning_rate": 1.9438833303083677e-05,
"loss": 0.0111,
"step": 386
},
{
"epoch": 3.9489795918367347,
"grad_norm": 2.2308216094970703,
"learning_rate": 1.943293623819925e-05,
"loss": 0.0106,
"step": 387
},
{
"epoch": 3.9591836734693877,
"grad_norm": 3.4741926193237305,
"learning_rate": 1.9427009254156783e-05,
"loss": 0.0252,
"step": 388
},
{
"epoch": 3.9693877551020407,
"grad_norm": 2.1992008686065674,
"learning_rate": 1.9421052369755335e-05,
"loss": 0.0249,
"step": 389
},
{
"epoch": 3.979591836734694,
"grad_norm": 6.670519828796387,
"learning_rate": 1.9415065603888813e-05,
"loss": 0.0537,
"step": 390
},
{
"epoch": 3.989795918367347,
"grad_norm": 3.5247645378112793,
"learning_rate": 1.940904897554589e-05,
"loss": 0.0229,
"step": 391
},
{
"epoch": 4.0,
"grad_norm": 0.005356221459805965,
"learning_rate": 1.940300250380996e-05,
"loss": 0.0001,
"step": 392
},
{
"epoch": 4.0,
"eval_dim_128_cosine_accuracy@1": 0.3591549295774648,
"eval_dim_128_cosine_accuracy@10": 0.42189500640204863,
"eval_dim_128_cosine_accuracy@3": 0.3674775928297055,
"eval_dim_128_cosine_accuracy@5": 0.3879641485275288,
"eval_dim_128_cosine_map@100": 0.44021886383811665,
"eval_dim_128_cosine_mrr@10": 0.37021726317500914,
"eval_dim_128_cosine_ndcg@10": 0.38125088011930397,
"eval_dim_128_cosine_precision@1": 0.3591549295774648,
"eval_dim_128_cosine_precision@10": 0.3190140845070422,
"eval_dim_128_cosine_precision@3": 0.36022193768672645,
"eval_dim_128_cosine_precision@5": 0.35134443021766965,
"eval_dim_128_cosine_recall@1": 0.03915563315840565,
"eval_dim_128_cosine_recall@10": 0.25990239457273806,
"eval_dim_128_cosine_recall@3": 0.11553418576873567,
"eval_dim_128_cosine_recall@5": 0.17491652721444478,
"eval_dim_256_cosine_accuracy@1": 0.3994878361075544,
"eval_dim_256_cosine_accuracy@10": 0.46030729833546735,
"eval_dim_256_cosine_accuracy@3": 0.4046094750320102,
"eval_dim_256_cosine_accuracy@5": 0.43021766965428937,
"eval_dim_256_cosine_map@100": 0.4793333960316136,
"eval_dim_256_cosine_mrr@10": 0.40997398532609775,
"eval_dim_256_cosine_ndcg@10": 0.42036202842456427,
"eval_dim_256_cosine_precision@1": 0.3994878361075544,
"eval_dim_256_cosine_precision@10": 0.354865556978233,
"eval_dim_256_cosine_precision@3": 0.3990610328638497,
"eval_dim_256_cosine_precision@5": 0.3897567221510883,
"eval_dim_256_cosine_recall@1": 0.042270750855143924,
"eval_dim_256_cosine_recall@10": 0.27590960778633977,
"eval_dim_256_cosine_recall@3": 0.12430640307454709,
"eval_dim_256_cosine_recall@5": 0.18776586614822713,
"eval_dim_512_cosine_accuracy@1": 0.4084507042253521,
"eval_dim_512_cosine_accuracy@10": 0.47247119078104993,
"eval_dim_512_cosine_accuracy@3": 0.4148527528809219,
"eval_dim_512_cosine_accuracy@5": 0.43982074263764404,
"eval_dim_512_cosine_map@100": 0.49048832795758607,
"eval_dim_512_cosine_mrr@10": 0.41940684511107007,
"eval_dim_512_cosine_ndcg@10": 0.4302962824455912,
"eval_dim_512_cosine_precision@1": 0.4084507042253521,
"eval_dim_512_cosine_precision@10": 0.3646606914212548,
"eval_dim_512_cosine_precision@3": 0.4082373026034998,
"eval_dim_512_cosine_precision@5": 0.3991037131882202,
"eval_dim_512_cosine_recall@1": 0.04301931947001461,
"eval_dim_512_cosine_recall@10": 0.2849750279222966,
"eval_dim_512_cosine_recall@3": 0.12607664541552657,
"eval_dim_512_cosine_recall@5": 0.19078237285443875,
"eval_dim_64_cosine_accuracy@1": 0.3111395646606914,
"eval_dim_64_cosine_accuracy@10": 0.3681177976952625,
"eval_dim_64_cosine_accuracy@3": 0.31690140845070425,
"eval_dim_64_cosine_accuracy@5": 0.3386683738796415,
"eval_dim_64_cosine_map@100": 0.3862030037114511,
"eval_dim_64_cosine_mrr@10": 0.3210490214011337,
"eval_dim_64_cosine_ndcg@10": 0.33142044222977946,
"eval_dim_64_cosine_precision@1": 0.3111395646606914,
"eval_dim_64_cosine_precision@10": 0.2779129321382843,
"eval_dim_64_cosine_precision@3": 0.3113529662825437,
"eval_dim_64_cosine_precision@5": 0.30435339308578746,
"eval_dim_64_cosine_recall@1": 0.033659624850547756,
"eval_dim_64_cosine_recall@10": 0.22914905789384257,
"eval_dim_64_cosine_recall@3": 0.09974008986215437,
"eval_dim_64_cosine_recall@5": 0.15190516916204871,
"eval_dim_768_cosine_accuracy@1": 0.4199743918053777,
"eval_dim_768_cosine_accuracy@10": 0.4910371318822023,
"eval_dim_768_cosine_accuracy@3": 0.42509603072983354,
"eval_dim_768_cosine_accuracy@5": 0.4526248399487836,
"eval_dim_768_cosine_map@100": 0.5040293251721872,
"eval_dim_768_cosine_mrr@10": 0.431798365953295,
"eval_dim_768_cosine_ndcg@10": 0.4437524319661329,
"eval_dim_768_cosine_precision@1": 0.4199743918053777,
"eval_dim_768_cosine_precision@10": 0.3774647887323943,
"eval_dim_768_cosine_precision@3": 0.4193341869398207,
"eval_dim_768_cosine_precision@5": 0.4102432778489116,
"eval_dim_768_cosine_recall@1": 0.043926224917091056,
"eval_dim_768_cosine_recall@10": 0.2942224009225615,
"eval_dim_768_cosine_recall@3": 0.12872931800473333,
"eval_dim_768_cosine_recall@5": 0.19528328675222623,
"eval_runtime": 183.3541,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.33142044222977946,
"eval_steps_per_second": 0.0,
"step": 392
},
{
"epoch": 4.010204081632653,
"grad_norm": 153.88648986816406,
"learning_rate": 1.9396926207859085e-05,
"loss": 0.2346,
"step": 393
},
{
"epoch": 4.020408163265306,
"grad_norm": 0.7419346570968628,
"learning_rate": 1.9390820106965908e-05,
"loss": 0.0079,
"step": 394
},
{
"epoch": 4.030612244897959,
"grad_norm": 0.6101226210594177,
"learning_rate": 1.9384684220497605e-05,
"loss": 0.0058,
"step": 395
},
{
"epoch": 4.040816326530612,
"grad_norm": 0.44310683012008667,
"learning_rate": 1.9378518567915842e-05,
"loss": 0.0035,
"step": 396
},
{
"epoch": 4.051020408163265,
"grad_norm": 0.04433823004364967,
"learning_rate": 1.937232316877668e-05,
"loss": 0.0002,
"step": 397
},
{
"epoch": 4.061224489795919,
"grad_norm": 6.246387481689453,
"learning_rate": 1.9366098042730534e-05,
"loss": 0.028,
"step": 398
},
{
"epoch": 4.071428571428571,
"grad_norm": 0.004334597382694483,
"learning_rate": 1.9359843209522112e-05,
"loss": 0.0001,
"step": 399
},
{
"epoch": 4.081632653061225,
"grad_norm": 0.022266261279582977,
"learning_rate": 1.935355868899034e-05,
"loss": 0.0003,
"step": 400
},
{
"epoch": 4.091836734693878,
"grad_norm": 1.131543517112732,
"learning_rate": 1.934724450106831e-05,
"loss": 0.0121,
"step": 401
},
{
"epoch": 4.1020408163265305,
"grad_norm": 10.245617866516113,
"learning_rate": 1.934090066578321e-05,
"loss": 0.1073,
"step": 402
},
{
"epoch": 4.112244897959184,
"grad_norm": 0.15691065788269043,
"learning_rate": 1.933452720325626e-05,
"loss": 0.0012,
"step": 403
},
{
"epoch": 4.122448979591836,
"grad_norm": 0.028827032074332237,
"learning_rate": 1.932812413370265e-05,
"loss": 0.0003,
"step": 404
},
{
"epoch": 4.13265306122449,
"grad_norm": 0.442047119140625,
"learning_rate": 1.9321691477431487e-05,
"loss": 0.0025,
"step": 405
},
{
"epoch": 4.142857142857143,
"grad_norm": 1.5630896091461182,
"learning_rate": 1.9315229254845712e-05,
"loss": 0.0097,
"step": 406
},
{
"epoch": 4.153061224489796,
"grad_norm": 1.185296893119812,
"learning_rate": 1.9308737486442045e-05,
"loss": 0.0127,
"step": 407
},
{
"epoch": 4.163265306122449,
"grad_norm": 0.004454934503883123,
"learning_rate": 1.930221619281092e-05,
"loss": 0.0001,
"step": 408
},
{
"epoch": 4.173469387755102,
"grad_norm": 0.5739359259605408,
"learning_rate": 1.9295665394636414e-05,
"loss": 0.007,
"step": 409
},
{
"epoch": 4.183673469387755,
"grad_norm": 0.8776015639305115,
"learning_rate": 1.92890851126962e-05,
"loss": 0.0154,
"step": 410
},
{
"epoch": 4.1938775510204085,
"grad_norm": 0.010717377066612244,
"learning_rate": 1.9282475367861444e-05,
"loss": 0.0002,
"step": 411
},
{
"epoch": 4.204081632653061,
"grad_norm": 2.1066489219665527,
"learning_rate": 1.927583618109678e-05,
"loss": 0.0207,
"step": 412
},
{
"epoch": 4.214285714285714,
"grad_norm": 8.608527183532715,
"learning_rate": 1.926916757346022e-05,
"loss": 0.0682,
"step": 413
},
{
"epoch": 4.224489795918367,
"grad_norm": 11.61573314666748,
"learning_rate": 1.926246956610309e-05,
"loss": 0.1168,
"step": 414
},
{
"epoch": 4.23469387755102,
"grad_norm": 0.3539029657840729,
"learning_rate": 1.9255742180269967e-05,
"loss": 0.0019,
"step": 415
},
{
"epoch": 4.244897959183674,
"grad_norm": 95.681884765625,
"learning_rate": 1.924898543729861e-05,
"loss": 1.7119,
"step": 416
},
{
"epoch": 4.255102040816326,
"grad_norm": 0.0037097211461514235,
"learning_rate": 1.9242199358619897e-05,
"loss": 0.0001,
"step": 417
},
{
"epoch": 4.26530612244898,
"grad_norm": 0.029390670359134674,
"learning_rate": 1.923538396575774e-05,
"loss": 0.0004,
"step": 418
},
{
"epoch": 4.275510204081632,
"grad_norm": 158.73011779785156,
"learning_rate": 1.922853928032904e-05,
"loss": 3.5151,
"step": 419
},
{
"epoch": 4.285714285714286,
"grad_norm": 350.69512939453125,
"learning_rate": 1.92216653240436e-05,
"loss": 7.6674,
"step": 420
},
{
"epoch": 4.295918367346939,
"grad_norm": 94.87379455566406,
"learning_rate": 1.921476211870408e-05,
"loss": 2.1193,
"step": 421
},
{
"epoch": 4.3061224489795915,
"grad_norm": 101.0951919555664,
"learning_rate": 1.9207829686205882e-05,
"loss": 1.1982,
"step": 422
},
{
"epoch": 4.316326530612245,
"grad_norm": 0.10624423623085022,
"learning_rate": 1.920086804853714e-05,
"loss": 0.0018,
"step": 423
},
{
"epoch": 4.326530612244898,
"grad_norm": 0.08854330331087112,
"learning_rate": 1.9193877227778604e-05,
"loss": 0.0008,
"step": 424
},
{
"epoch": 4.336734693877551,
"grad_norm": 5.69531774520874,
"learning_rate": 1.9186857246103586e-05,
"loss": 0.0581,
"step": 425
},
{
"epoch": 4.346938775510204,
"grad_norm": 2.591644287109375,
"learning_rate": 1.91798081257779e-05,
"loss": 0.0319,
"step": 426
},
{
"epoch": 4.357142857142857,
"grad_norm": 0.3840072751045227,
"learning_rate": 1.917272988915976e-05,
"loss": 0.0041,
"step": 427
},
{
"epoch": 4.36734693877551,
"grad_norm": 0.0030958654824644327,
"learning_rate": 1.9165622558699763e-05,
"loss": 0.0,
"step": 428
},
{
"epoch": 4.377551020408164,
"grad_norm": 0.008273592218756676,
"learning_rate": 1.915848615694076e-05,
"loss": 0.0001,
"step": 429
},
{
"epoch": 4.387755102040816,
"grad_norm": 0.043385185301303864,
"learning_rate": 1.9151320706517814e-05,
"loss": 0.0005,
"step": 430
},
{
"epoch": 4.3979591836734695,
"grad_norm": 0.022923173382878304,
"learning_rate": 1.9144126230158127e-05,
"loss": 0.0002,
"step": 431
},
{
"epoch": 4.408163265306122,
"grad_norm": 0.14256472885608673,
"learning_rate": 1.913690275068097e-05,
"loss": 0.0012,
"step": 432
},
{
"epoch": 4.418367346938775,
"grad_norm": 5.968089580535889,
"learning_rate": 1.912965029099759e-05,
"loss": 0.0395,
"step": 433
},
{
"epoch": 4.428571428571429,
"grad_norm": 0.10840898007154465,
"learning_rate": 1.9122368874111172e-05,
"loss": 0.001,
"step": 434
},
{
"epoch": 4.438775510204081,
"grad_norm": 0.07155577838420868,
"learning_rate": 1.9115058523116734e-05,
"loss": 0.0006,
"step": 435
},
{
"epoch": 4.448979591836735,
"grad_norm": 1.8904563188552856,
"learning_rate": 1.9107719261201066e-05,
"loss": 0.0262,
"step": 436
},
{
"epoch": 4.459183673469388,
"grad_norm": 97.97481536865234,
"learning_rate": 1.9100351111642666e-05,
"loss": 4.1211,
"step": 437
},
{
"epoch": 4.469387755102041,
"grad_norm": 1.083544373512268,
"learning_rate": 1.9092954097811654e-05,
"loss": 0.0119,
"step": 438
},
{
"epoch": 4.479591836734694,
"grad_norm": 0.060115616768598557,
"learning_rate": 1.908552824316969e-05,
"loss": 0.0006,
"step": 439
},
{
"epoch": 4.489795918367347,
"grad_norm": 8.608622550964355,
"learning_rate": 1.9078073571269922e-05,
"loss": 0.0865,
"step": 440
},
{
"epoch": 4.5,
"grad_norm": 0.1251484751701355,
"learning_rate": 1.90705901057569e-05,
"loss": 0.0007,
"step": 441
},
{
"epoch": 4.510204081632653,
"grad_norm": 0.1283632069826126,
"learning_rate": 1.9063077870366504e-05,
"loss": 0.0011,
"step": 442
},
{
"epoch": 4.520408163265306,
"grad_norm": 10.08908462524414,
"learning_rate": 1.9055536888925844e-05,
"loss": 0.0804,
"step": 443
},
{
"epoch": 4.530612244897959,
"grad_norm": 4.281910419464111,
"learning_rate": 1.9047967185353236e-05,
"loss": 0.0596,
"step": 444
},
{
"epoch": 4.540816326530612,
"grad_norm": 0.049331195652484894,
"learning_rate": 1.9040368783658075e-05,
"loss": 0.0006,
"step": 445
},
{
"epoch": 4.551020408163265,
"grad_norm": 0.18595637381076813,
"learning_rate": 1.903274170794079e-05,
"loss": 0.0019,
"step": 446
},
{
"epoch": 4.561224489795919,
"grad_norm": 44.52228927612305,
"learning_rate": 1.9025085982392753e-05,
"loss": 0.5596,
"step": 447
},
{
"epoch": 4.571428571428571,
"grad_norm": 0.19094879925251007,
"learning_rate": 1.9017401631296208e-05,
"loss": 0.0018,
"step": 448
},
{
"epoch": 4.581632653061225,
"grad_norm": 3.7835676670074463,
"learning_rate": 1.900968867902419e-05,
"loss": 0.0379,
"step": 449
},
{
"epoch": 4.591836734693878,
"grad_norm": 0.6503361463546753,
"learning_rate": 1.9001947150040462e-05,
"loss": 0.0076,
"step": 450
},
{
"epoch": 4.6020408163265305,
"grad_norm": 0.1325170397758484,
"learning_rate": 1.8994177068899414e-05,
"loss": 0.0012,
"step": 451
},
{
"epoch": 4.612244897959184,
"grad_norm": 0.0501476414501667,
"learning_rate": 1.8986378460246e-05,
"loss": 0.0006,
"step": 452
},
{
"epoch": 4.622448979591836,
"grad_norm": 83.80696868896484,
"learning_rate": 1.8978551348815653e-05,
"loss": 0.6476,
"step": 453
},
{
"epoch": 4.63265306122449,
"grad_norm": 0.006705591455101967,
"learning_rate": 1.897069575943422e-05,
"loss": 0.0,
"step": 454
},
{
"epoch": 4.642857142857143,
"grad_norm": 4.572281837463379,
"learning_rate": 1.896281171701787e-05,
"loss": 0.0214,
"step": 455
},
{
"epoch": 4.653061224489796,
"grad_norm": 0.03425569087266922,
"learning_rate": 1.895489924657301e-05,
"loss": 0.0005,
"step": 456
},
{
"epoch": 4.663265306122449,
"grad_norm": 219.26651000976562,
"learning_rate": 1.894695837319623e-05,
"loss": 4.8527,
"step": 457
},
{
"epoch": 4.673469387755102,
"grad_norm": 39.20528793334961,
"learning_rate": 1.8938989122074195e-05,
"loss": 0.4774,
"step": 458
},
{
"epoch": 4.683673469387755,
"grad_norm": 0.02694033645093441,
"learning_rate": 1.8930991518483586e-05,
"loss": 0.0003,
"step": 459
},
{
"epoch": 4.6938775510204085,
"grad_norm": 0.008928623050451279,
"learning_rate": 1.8922965587791e-05,
"loss": 0.0001,
"step": 460
},
{
"epoch": 4.704081632653061,
"grad_norm": 0.7024058103561401,
"learning_rate": 1.8914911355452895e-05,
"loss": 0.0075,
"step": 461
},
{
"epoch": 4.714285714285714,
"grad_norm": 0.0062184385024011135,
"learning_rate": 1.890682884701549e-05,
"loss": 0.0001,
"step": 462
},
{
"epoch": 4.724489795918368,
"grad_norm": 265.30419921875,
"learning_rate": 1.8898718088114688e-05,
"loss": 7.4959,
"step": 463
},
{
"epoch": 4.73469387755102,
"grad_norm": 0.0009059436270035803,
"learning_rate": 1.8890579104475996e-05,
"loss": 0.0,
"step": 464
},
{
"epoch": 4.744897959183674,
"grad_norm": 58.41782760620117,
"learning_rate": 1.8882411921914442e-05,
"loss": 2.1102,
"step": 465
},
{
"epoch": 4.755102040816326,
"grad_norm": 0.17638947069644928,
"learning_rate": 1.8874216566334502e-05,
"loss": 0.0027,
"step": 466
},
{
"epoch": 4.76530612244898,
"grad_norm": 0.30780017375946045,
"learning_rate": 1.8865993063730003e-05,
"loss": 0.0035,
"step": 467
},
{
"epoch": 4.775510204081632,
"grad_norm": 41.008460998535156,
"learning_rate": 1.885774144018405e-05,
"loss": 0.574,
"step": 468
},
{
"epoch": 4.785714285714286,
"grad_norm": 1.381316900253296,
"learning_rate": 1.8849461721868948e-05,
"loss": 0.0191,
"step": 469
},
{
"epoch": 4.795918367346939,
"grad_norm": 2.717698574066162,
"learning_rate": 1.8841153935046098e-05,
"loss": 0.0214,
"step": 470
},
{
"epoch": 4.8061224489795915,
"grad_norm": 0.2837541401386261,
"learning_rate": 1.8832818106065943e-05,
"loss": 0.0016,
"step": 471
},
{
"epoch": 4.816326530612245,
"grad_norm": 0.03325734660029411,
"learning_rate": 1.8824454261367862e-05,
"loss": 0.0003,
"step": 472
},
{
"epoch": 4.826530612244898,
"grad_norm": 0.0347367525100708,
"learning_rate": 1.881606242748009e-05,
"loss": 0.0003,
"step": 473
},
{
"epoch": 4.836734693877551,
"grad_norm": 0.7267290353775024,
"learning_rate": 1.8807642631019648e-05,
"loss": 0.0038,
"step": 474
},
{
"epoch": 4.846938775510204,
"grad_norm": 0.002910307375714183,
"learning_rate": 1.8799194898692238e-05,
"loss": 0.0,
"step": 475
},
{
"epoch": 4.857142857142857,
"grad_norm": 49.9831428527832,
"learning_rate": 1.8790719257292175e-05,
"loss": 0.4292,
"step": 476
},
{
"epoch": 4.86734693877551,
"grad_norm": 0.09869600087404251,
"learning_rate": 1.8782215733702286e-05,
"loss": 0.0009,
"step": 477
},
{
"epoch": 4.877551020408164,
"grad_norm": 4.552360534667969,
"learning_rate": 1.8773684354893848e-05,
"loss": 0.041,
"step": 478
},
{
"epoch": 4.887755102040816,
"grad_norm": 8.773714065551758,
"learning_rate": 1.8765125147926477e-05,
"loss": 0.0909,
"step": 479
},
{
"epoch": 4.8979591836734695,
"grad_norm": 0.24354512989521027,
"learning_rate": 1.875653813994806e-05,
"loss": 0.0024,
"step": 480
},
{
"epoch": 4.908163265306122,
"grad_norm": 0.005503151565790176,
"learning_rate": 1.874792335819466e-05,
"loss": 0.0001,
"step": 481
},
{
"epoch": 4.918367346938775,
"grad_norm": 150.60595703125,
"learning_rate": 1.873928082999043e-05,
"loss": 0.3607,
"step": 482
},
{
"epoch": 4.928571428571429,
"grad_norm": 44.48455047607422,
"learning_rate": 1.8730610582747538e-05,
"loss": 0.994,
"step": 483
},
{
"epoch": 4.938775510204081,
"grad_norm": 1.845575213432312,
"learning_rate": 1.8721912643966055e-05,
"loss": 0.0186,
"step": 484
},
{
"epoch": 4.948979591836735,
"grad_norm": 12.332234382629395,
"learning_rate": 1.8713187041233896e-05,
"loss": 0.206,
"step": 485
},
{
"epoch": 4.959183673469388,
"grad_norm": 0.08337131142616272,
"learning_rate": 1.8704433802226714e-05,
"loss": 0.0008,
"step": 486
},
{
"epoch": 4.969387755102041,
"grad_norm": 0.053039923310279846,
"learning_rate": 1.8695652954707823e-05,
"loss": 0.0006,
"step": 487
},
{
"epoch": 4.979591836734694,
"grad_norm": 22.981956481933594,
"learning_rate": 1.86868445265281e-05,
"loss": 0.2176,
"step": 488
},
{
"epoch": 4.989795918367347,
"grad_norm": 27.581205368041992,
"learning_rate": 1.86780085456259e-05,
"loss": 0.2219,
"step": 489
},
{
"epoch": 5.0,
"grad_norm": 1.5270394086837769,
"learning_rate": 1.866914504002698e-05,
"loss": 0.0112,
"step": 490
},
{
"epoch": 5.0,
"eval_dim_128_cosine_accuracy@1": 0.35979513444302175,
"eval_dim_128_cosine_accuracy@10": 0.4238156209987196,
"eval_dim_128_cosine_accuracy@3": 0.3687580025608195,
"eval_dim_128_cosine_accuracy@5": 0.39180537772087065,
"eval_dim_128_cosine_map@100": 0.437074453724746,
"eval_dim_128_cosine_mrr@10": 0.37126293112208564,
"eval_dim_128_cosine_ndcg@10": 0.38254301379946687,
"eval_dim_128_cosine_precision@1": 0.35979513444302175,
"eval_dim_128_cosine_precision@10": 0.32528809218950067,
"eval_dim_128_cosine_precision@3": 0.36022193768672645,
"eval_dim_128_cosine_precision@5": 0.353393085787452,
"eval_dim_128_cosine_recall@1": 0.037743801580456406,
"eval_dim_128_cosine_recall@10": 0.2526151629138788,
"eval_dim_128_cosine_recall@3": 0.11036730376509347,
"eval_dim_128_cosine_recall@5": 0.16748863189789,
"eval_dim_256_cosine_accuracy@1": 0.4001280409731114,
"eval_dim_256_cosine_accuracy@10": 0.4532650448143406,
"eval_dim_256_cosine_accuracy@3": 0.4039692701664533,
"eval_dim_256_cosine_accuracy@5": 0.42509603072983354,
"eval_dim_256_cosine_map@100": 0.46849072213833953,
"eval_dim_256_cosine_mrr@10": 0.4091183464422898,
"eval_dim_256_cosine_ndcg@10": 0.4189536617481397,
"eval_dim_256_cosine_precision@1": 0.4001280409731114,
"eval_dim_256_cosine_precision@10": 0.35627400768245837,
"eval_dim_256_cosine_precision@3": 0.3990610328638497,
"eval_dim_256_cosine_precision@5": 0.3896286811779769,
"eval_dim_256_cosine_recall@1": 0.040728817488916956,
"eval_dim_256_cosine_recall@10": 0.27123568020124944,
"eval_dim_256_cosine_recall@3": 0.11918215775911108,
"eval_dim_256_cosine_recall@5": 0.1806338192919952,
"eval_dim_512_cosine_accuracy@1": 0.40973111395646605,
"eval_dim_512_cosine_accuracy@10": 0.46350832266325226,
"eval_dim_512_cosine_accuracy@3": 0.41293213828425096,
"eval_dim_512_cosine_accuracy@5": 0.43918053777208704,
"eval_dim_512_cosine_map@100": 0.4812812064576608,
"eval_dim_512_cosine_mrr@10": 0.4190039225250491,
"eval_dim_512_cosine_ndcg@10": 0.42914983827992026,
"eval_dim_512_cosine_precision@1": 0.40973111395646605,
"eval_dim_512_cosine_precision@10": 0.3639564660691421,
"eval_dim_512_cosine_precision@3": 0.4080239009816474,
"eval_dim_512_cosine_precision@5": 0.3991037131882202,
"eval_dim_512_cosine_recall@1": 0.04220302608677807,
"eval_dim_512_cosine_recall@10": 0.2804441921081484,
"eval_dim_512_cosine_recall@3": 0.1232911911396302,
"eval_dim_512_cosine_recall@5": 0.18736557372627924,
"eval_dim_64_cosine_accuracy@1": 0.31498079385403327,
"eval_dim_64_cosine_accuracy@10": 0.3732394366197183,
"eval_dim_64_cosine_accuracy@3": 0.31882202304737517,
"eval_dim_64_cosine_accuracy@5": 0.34314980793854033,
"eval_dim_64_cosine_map@100": 0.3879101681453684,
"eval_dim_64_cosine_mrr@10": 0.32475204763937116,
"eval_dim_64_cosine_ndcg@10": 0.3352059384790271,
"eval_dim_64_cosine_precision@1": 0.31498079385403327,
"eval_dim_64_cosine_precision@10": 0.2811779769526248,
"eval_dim_64_cosine_precision@3": 0.3147673922321809,
"eval_dim_64_cosine_precision@5": 0.30742637644046095,
"eval_dim_64_cosine_recall@1": 0.03382134135767763,
"eval_dim_64_cosine_recall@10": 0.22782511684449083,
"eval_dim_64_cosine_recall@3": 0.09979477196363994,
"eval_dim_64_cosine_recall@5": 0.1517937835977082,
"eval_dim_768_cosine_accuracy@1": 0.41613316261203587,
"eval_dim_768_cosine_accuracy@10": 0.4731113956466069,
"eval_dim_768_cosine_accuracy@3": 0.4193341869398207,
"eval_dim_768_cosine_accuracy@5": 0.44366197183098594,
"eval_dim_768_cosine_map@100": 0.4890243769832116,
"eval_dim_768_cosine_mrr@10": 0.4256112432168768,
"eval_dim_768_cosine_ndcg@10": 0.43491019147566995,
"eval_dim_768_cosine_precision@1": 0.41613316261203587,
"eval_dim_768_cosine_precision@10": 0.36984635083226636,
"eval_dim_768_cosine_precision@3": 0.4148527528809219,
"eval_dim_768_cosine_precision@5": 0.4051216389244558,
"eval_dim_768_cosine_recall@1": 0.04254250894522427,
"eval_dim_768_cosine_recall@10": 0.28288145399591047,
"eval_dim_768_cosine_recall@3": 0.1244727395259559,
"eval_dim_768_cosine_recall@5": 0.1886213008910836,
"eval_runtime": 183.201,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.3352059384790271,
"eval_steps_per_second": 0.0,
"step": 490
},
{
"epoch": 5.010204081632653,
"grad_norm": 0.05376085638999939,
"learning_rate": 1.866025403784439e-05,
"loss": 0.0005,
"step": 491
},
{
"epoch": 5.020408163265306,
"grad_norm": 0.16362005472183228,
"learning_rate": 1.865133556727839e-05,
"loss": 0.0016,
"step": 492
},
{
"epoch": 5.030612244897959,
"grad_norm": 0.9914959669113159,
"learning_rate": 1.864238965661637e-05,
"loss": 0.0091,
"step": 493
},
{
"epoch": 5.040816326530612,
"grad_norm": 30.57797622680664,
"learning_rate": 1.8633416334232754e-05,
"loss": 0.0467,
"step": 494
},
{
"epoch": 5.051020408163265,
"grad_norm": 2.5222413539886475,
"learning_rate": 1.862441562858891e-05,
"loss": 0.0229,
"step": 495
},
{
"epoch": 5.061224489795919,
"grad_norm": 0.0006655550096184015,
"learning_rate": 1.861538756823305e-05,
"loss": 0.0,
"step": 496
},
{
"epoch": 5.071428571428571,
"grad_norm": 0.11231075972318649,
"learning_rate": 1.8606332181800165e-05,
"loss": 0.0014,
"step": 497
},
{
"epoch": 5.081632653061225,
"grad_norm": 0.2929253876209259,
"learning_rate": 1.8597249498011906e-05,
"loss": 0.0045,
"step": 498
},
{
"epoch": 5.091836734693878,
"grad_norm": 0.013004861772060394,
"learning_rate": 1.8588139545676506e-05,
"loss": 0.0002,
"step": 499
},
{
"epoch": 5.1020408163265305,
"grad_norm": 10.929536819458008,
"learning_rate": 1.8579002353688695e-05,
"loss": 0.105,
"step": 500
},
{
"epoch": 5.112244897959184,
"grad_norm": 0.0029807123355567455,
"learning_rate": 1.8569837951029597e-05,
"loss": 0.0,
"step": 501
},
{
"epoch": 5.122448979591836,
"grad_norm": 0.6099005937576294,
"learning_rate": 1.8560646366766637e-05,
"loss": 0.0063,
"step": 502
},
{
"epoch": 5.13265306122449,
"grad_norm": 2.8765928745269775,
"learning_rate": 1.8551427630053464e-05,
"loss": 0.0242,
"step": 503
},
{
"epoch": 5.142857142857143,
"grad_norm": 0.00245065544731915,
"learning_rate": 1.8542181770129838e-05,
"loss": 0.0,
"step": 504
},
{
"epoch": 5.153061224489796,
"grad_norm": 0.3614043891429901,
"learning_rate": 1.8532908816321557e-05,
"loss": 0.0033,
"step": 505
},
{
"epoch": 5.163265306122449,
"grad_norm": 0.04071643948554993,
"learning_rate": 1.852360879804035e-05,
"loss": 0.0004,
"step": 506
},
{
"epoch": 5.173469387755102,
"grad_norm": 0.2372654378414154,
"learning_rate": 1.851428174478379e-05,
"loss": 0.0014,
"step": 507
},
{
"epoch": 5.183673469387755,
"grad_norm": 0.3527073264122009,
"learning_rate": 1.8504927686135194e-05,
"loss": 0.0027,
"step": 508
},
{
"epoch": 5.1938775510204085,
"grad_norm": 198.38519287109375,
"learning_rate": 1.849554665176354e-05,
"loss": 2.3163,
"step": 509
},
{
"epoch": 5.204081632653061,
"grad_norm": 20.711875915527344,
"learning_rate": 1.8486138671423366e-05,
"loss": 0.5547,
"step": 510
},
{
"epoch": 5.214285714285714,
"grad_norm": 3.5171825885772705,
"learning_rate": 1.8476703774954676e-05,
"loss": 0.0802,
"step": 511
},
{
"epoch": 5.224489795918367,
"grad_norm": 0.10855702310800552,
"learning_rate": 1.8467241992282842e-05,
"loss": 0.0011,
"step": 512
},
{
"epoch": 5.23469387755102,
"grad_norm": 0.008565492928028107,
"learning_rate": 1.845775335341852e-05,
"loss": 0.0001,
"step": 513
},
{
"epoch": 5.244897959183674,
"grad_norm": 4.928635597229004,
"learning_rate": 1.8448237888457546e-05,
"loss": 0.0109,
"step": 514
},
{
"epoch": 5.255102040816326,
"grad_norm": 0.488430380821228,
"learning_rate": 1.8438695627580832e-05,
"loss": 0.0044,
"step": 515
},
{
"epoch": 5.26530612244898,
"grad_norm": 0.40208637714385986,
"learning_rate": 1.8429126601054302e-05,
"loss": 0.0036,
"step": 516
},
{
"epoch": 5.275510204081632,
"grad_norm": 0.18485486507415771,
"learning_rate": 1.841953083922875e-05,
"loss": 0.0018,
"step": 517
},
{
"epoch": 5.285714285714286,
"grad_norm": 0.9628912210464478,
"learning_rate": 1.8409908372539788e-05,
"loss": 0.0073,
"step": 518
},
{
"epoch": 5.295918367346939,
"grad_norm": 0.21243244409561157,
"learning_rate": 1.8400259231507716e-05,
"loss": 0.0025,
"step": 519
},
{
"epoch": 5.3061224489795915,
"grad_norm": 0.005211708135902882,
"learning_rate": 1.8390583446737448e-05,
"loss": 0.0001,
"step": 520
},
{
"epoch": 5.316326530612245,
"grad_norm": 0.21359172463417053,
"learning_rate": 1.8380881048918406e-05,
"loss": 0.0031,
"step": 521
},
{
"epoch": 5.326530612244898,
"grad_norm": 64.16328430175781,
"learning_rate": 1.837115206882442e-05,
"loss": 0.1512,
"step": 522
},
{
"epoch": 5.336734693877551,
"grad_norm": 0.006268950179219246,
"learning_rate": 1.8361396537313628e-05,
"loss": 0.0001,
"step": 523
},
{
"epoch": 5.346938775510204,
"grad_norm": 1.6587684154510498,
"learning_rate": 1.835161448532839e-05,
"loss": 0.0169,
"step": 524
},
{
"epoch": 5.357142857142857,
"grad_norm": 0.22815948724746704,
"learning_rate": 1.8341805943895178e-05,
"loss": 0.0021,
"step": 525
},
{
"epoch": 5.36734693877551,
"grad_norm": 1.3137353658676147,
"learning_rate": 1.833197094412449e-05,
"loss": 0.0088,
"step": 526
},
{
"epoch": 5.377551020408164,
"grad_norm": 0.0314713716506958,
"learning_rate": 1.832210951721074e-05,
"loss": 0.0003,
"step": 527
},
{
"epoch": 5.387755102040816,
"grad_norm": 2.1728274822235107,
"learning_rate": 1.831222169443216e-05,
"loss": 0.0308,
"step": 528
},
{
"epoch": 5.3979591836734695,
"grad_norm": 0.0005056152585893869,
"learning_rate": 1.8302307507150703e-05,
"loss": 0.0,
"step": 529
},
{
"epoch": 5.408163265306122,
"grad_norm": 40.51821517944336,
"learning_rate": 1.8292366986811952e-05,
"loss": 0.3433,
"step": 530
},
{
"epoch": 5.418367346938775,
"grad_norm": 0.018755732104182243,
"learning_rate": 1.8282400164945006e-05,
"loss": 0.0003,
"step": 531
},
{
"epoch": 5.428571428571429,
"grad_norm": 0.2546168267726898,
"learning_rate": 1.8272407073162393e-05,
"loss": 0.0036,
"step": 532
},
{
"epoch": 5.438775510204081,
"grad_norm": 0.12199478596448898,
"learning_rate": 1.826238774315995e-05,
"loss": 0.0008,
"step": 533
},
{
"epoch": 5.448979591836735,
"grad_norm": 0.6051994562149048,
"learning_rate": 1.8252342206716754e-05,
"loss": 0.0056,
"step": 534
},
{
"epoch": 5.459183673469388,
"grad_norm": 0.16368649899959564,
"learning_rate": 1.8242270495694985e-05,
"loss": 0.0028,
"step": 535
},
{
"epoch": 5.469387755102041,
"grad_norm": 0.09096916019916534,
"learning_rate": 1.8232172642039856e-05,
"loss": 0.0009,
"step": 536
},
{
"epoch": 5.479591836734694,
"grad_norm": 0.13005401194095612,
"learning_rate": 1.8222048677779495e-05,
"loss": 0.0015,
"step": 537
},
{
"epoch": 5.489795918367347,
"grad_norm": 0.4791003465652466,
"learning_rate": 1.821189863502484e-05,
"loss": 0.0023,
"step": 538
},
{
"epoch": 5.5,
"grad_norm": 0.07315538823604584,
"learning_rate": 1.820172254596956e-05,
"loss": 0.0007,
"step": 539
},
{
"epoch": 5.510204081632653,
"grad_norm": 0.006543061695992947,
"learning_rate": 1.819152044288992e-05,
"loss": 0.0001,
"step": 540
},
{
"epoch": 5.520408163265306,
"grad_norm": 1.838545799255371,
"learning_rate": 1.8181292358144703e-05,
"loss": 0.0231,
"step": 541
},
{
"epoch": 5.530612244897959,
"grad_norm": 18.193073272705078,
"learning_rate": 1.81710383241751e-05,
"loss": 0.1314,
"step": 542
},
{
"epoch": 5.540816326530612,
"grad_norm": 145.21258544921875,
"learning_rate": 1.816075837350461e-05,
"loss": 4.2928,
"step": 543
},
{
"epoch": 5.551020408163265,
"grad_norm": 2.3930890560150146,
"learning_rate": 1.815045253873893e-05,
"loss": 0.0168,
"step": 544
},
{
"epoch": 5.561224489795919,
"grad_norm": 0.044057078659534454,
"learning_rate": 1.814012085256585e-05,
"loss": 0.0002,
"step": 545
},
{
"epoch": 5.571428571428571,
"grad_norm": 0.026266353204846382,
"learning_rate": 1.812976334775517e-05,
"loss": 0.0003,
"step": 546
},
{
"epoch": 5.581632653061225,
"grad_norm": 0.6757158041000366,
"learning_rate": 1.811938005715857e-05,
"loss": 0.0051,
"step": 547
},
{
"epoch": 5.591836734693878,
"grad_norm": 0.006156954448670149,
"learning_rate": 1.8108971013709512e-05,
"loss": 0.0001,
"step": 548
},
{
"epoch": 5.6020408163265305,
"grad_norm": 0.4917071461677551,
"learning_rate": 1.8098536250423154e-05,
"loss": 0.003,
"step": 549
},
{
"epoch": 5.612244897959184,
"grad_norm": 0.3305758237838745,
"learning_rate": 1.8088075800396227e-05,
"loss": 0.0037,
"step": 550
},
{
"epoch": 5.622448979591836,
"grad_norm": 0.5282797813415527,
"learning_rate": 1.8077589696806925e-05,
"loss": 0.0047,
"step": 551
},
{
"epoch": 5.63265306122449,
"grad_norm": 0.46409568190574646,
"learning_rate": 1.8067077972914822e-05,
"loss": 0.0042,
"step": 552
},
{
"epoch": 5.642857142857143,
"grad_norm": 0.10639617592096329,
"learning_rate": 1.8056540662060747e-05,
"loss": 0.0011,
"step": 553
},
{
"epoch": 5.653061224489796,
"grad_norm": 0.05673323944211006,
"learning_rate": 1.8045977797666685e-05,
"loss": 0.0007,
"step": 554
},
{
"epoch": 5.663265306122449,
"grad_norm": 0.6235435009002686,
"learning_rate": 1.8035389413235672e-05,
"loss": 0.0036,
"step": 555
},
{
"epoch": 5.673469387755102,
"grad_norm": 5.090694427490234,
"learning_rate": 1.8024775542351695e-05,
"loss": 0.0572,
"step": 556
},
{
"epoch": 5.683673469387755,
"grad_norm": 42.43338394165039,
"learning_rate": 1.8014136218679566e-05,
"loss": 0.4782,
"step": 557
},
{
"epoch": 5.6938775510204085,
"grad_norm": 0.4403914511203766,
"learning_rate": 1.8003471475964837e-05,
"loss": 0.0033,
"step": 558
},
{
"epoch": 5.704081632653061,
"grad_norm": 3.306797742843628,
"learning_rate": 1.7992781348033678e-05,
"loss": 0.0453,
"step": 559
},
{
"epoch": 5.714285714285714,
"grad_norm": 0.06393012404441833,
"learning_rate": 1.7982065868792772e-05,
"loss": 0.0006,
"step": 560
},
{
"epoch": 5.724489795918368,
"grad_norm": 0.027197500690817833,
"learning_rate": 1.7971325072229227e-05,
"loss": 0.0003,
"step": 561
},
{
"epoch": 5.73469387755102,
"grad_norm": 0.1507413387298584,
"learning_rate": 1.7960558992410432e-05,
"loss": 0.0018,
"step": 562
},
{
"epoch": 5.744897959183674,
"grad_norm": 5.787868022918701,
"learning_rate": 1.794976766348398e-05,
"loss": 0.0589,
"step": 563
},
{
"epoch": 5.755102040816326,
"grad_norm": 0.0066904472187161446,
"learning_rate": 1.7938951119677544e-05,
"loss": 0.0001,
"step": 564
},
{
"epoch": 5.76530612244898,
"grad_norm": 0.11523879319429398,
"learning_rate": 1.7928109395298777e-05,
"loss": 0.0013,
"step": 565
},
{
"epoch": 5.775510204081632,
"grad_norm": 0.004689464345574379,
"learning_rate": 1.79172425247352e-05,
"loss": 0.0001,
"step": 566
},
{
"epoch": 5.785714285714286,
"grad_norm": 0.09963806718587875,
"learning_rate": 1.7906350542454084e-05,
"loss": 0.0011,
"step": 567
},
{
"epoch": 5.795918367346939,
"grad_norm": 0.3132230341434479,
"learning_rate": 1.7895433483002356e-05,
"loss": 0.0019,
"step": 568
},
{
"epoch": 5.8061224489795915,
"grad_norm": 0.6437335014343262,
"learning_rate": 1.788449138100648e-05,
"loss": 0.0055,
"step": 569
},
{
"epoch": 5.816326530612245,
"grad_norm": 32.23442077636719,
"learning_rate": 1.787352427117235e-05,
"loss": 0.6808,
"step": 570
},
{
"epoch": 5.826530612244898,
"grad_norm": 0.12098479270935059,
"learning_rate": 1.7862532188285176e-05,
"loss": 0.0007,
"step": 571
},
{
"epoch": 5.836734693877551,
"grad_norm": 0.07133156061172485,
"learning_rate": 1.785151516720938e-05,
"loss": 0.0008,
"step": 572
},
{
"epoch": 5.846938775510204,
"grad_norm": 35.33832550048828,
"learning_rate": 1.7840473242888486e-05,
"loss": 0.7029,
"step": 573
},
{
"epoch": 5.857142857142857,
"grad_norm": 0.22042210400104523,
"learning_rate": 1.7829406450344998e-05,
"loss": 0.003,
"step": 574
},
{
"epoch": 5.86734693877551,
"grad_norm": 0.07045172154903412,
"learning_rate": 1.78183148246803e-05,
"loss": 0.0008,
"step": 575
},
{
"epoch": 5.877551020408164,
"grad_norm": 0.009592265821993351,
"learning_rate": 1.780719840107454e-05,
"loss": 0.0001,
"step": 576
},
{
"epoch": 5.887755102040816,
"grad_norm": 126.49264526367188,
"learning_rate": 1.779605721478652e-05,
"loss": 3.5868,
"step": 577
},
{
"epoch": 5.8979591836734695,
"grad_norm": 0.1849050521850586,
"learning_rate": 1.778489130115359e-05,
"loss": 0.0019,
"step": 578
},
{
"epoch": 5.908163265306122,
"grad_norm": 0.200727641582489,
"learning_rate": 1.777370069559152e-05,
"loss": 0.0023,
"step": 579
},
{
"epoch": 5.918367346938775,
"grad_norm": 4.7665019035339355,
"learning_rate": 1.7762485433594398e-05,
"loss": 0.0625,
"step": 580
},
{
"epoch": 5.928571428571429,
"grad_norm": 18.486141204833984,
"learning_rate": 1.775124555073452e-05,
"loss": 0.1886,
"step": 581
},
{
"epoch": 5.938775510204081,
"grad_norm": 2.461655855178833,
"learning_rate": 1.7739981082662275e-05,
"loss": 0.0253,
"step": 582
},
{
"epoch": 5.948979591836735,
"grad_norm": 77.3629379272461,
"learning_rate": 1.7728692065106032e-05,
"loss": 0.6732,
"step": 583
},
{
"epoch": 5.959183673469388,
"grad_norm": 0.00850403681397438,
"learning_rate": 1.771737853387202e-05,
"loss": 0.0001,
"step": 584
},
{
"epoch": 5.969387755102041,
"grad_norm": 28.388769149780273,
"learning_rate": 1.7706040524844222e-05,
"loss": 0.239,
"step": 585
},
{
"epoch": 5.979591836734694,
"grad_norm": 218.7047119140625,
"learning_rate": 1.769467807398426e-05,
"loss": 5.5812,
"step": 586
},
{
"epoch": 5.989795918367347,
"grad_norm": 0.9289079904556274,
"learning_rate": 1.768329121733128e-05,
"loss": 0.0129,
"step": 587
},
{
"epoch": 6.0,
"grad_norm": 0.0184466615319252,
"learning_rate": 1.7671879991001838e-05,
"loss": 0.0002,
"step": 588
},
{
"epoch": 6.0,
"eval_dim_128_cosine_accuracy@1": 0.3495518565941101,
"eval_dim_128_cosine_accuracy@10": 0.4154929577464789,
"eval_dim_128_cosine_accuracy@3": 0.353393085787452,
"eval_dim_128_cosine_accuracy@5": 0.38028169014084506,
"eval_dim_128_cosine_map@100": 0.4276170602366832,
"eval_dim_128_cosine_mrr@10": 0.3604691278987048,
"eval_dim_128_cosine_ndcg@10": 0.37104719123202995,
"eval_dim_128_cosine_precision@1": 0.3495518565941101,
"eval_dim_128_cosine_precision@10": 0.312291933418694,
"eval_dim_128_cosine_precision@3": 0.34848484848484845,
"eval_dim_128_cosine_precision@5": 0.339820742637644,
"eval_dim_128_cosine_recall@1": 0.037856544549247154,
"eval_dim_128_cosine_recall@10": 0.25324669198316696,
"eval_dim_128_cosine_recall@3": 0.11129608559954554,
"eval_dim_128_cosine_recall@5": 0.1684035717787531,
"eval_dim_256_cosine_accuracy@1": 0.38092189500640206,
"eval_dim_256_cosine_accuracy@10": 0.44366197183098594,
"eval_dim_256_cosine_accuracy@3": 0.38412291933418696,
"eval_dim_256_cosine_accuracy@5": 0.40973111395646605,
"eval_dim_256_cosine_map@100": 0.4594474328308739,
"eval_dim_256_cosine_mrr@10": 0.3912121720220308,
"eval_dim_256_cosine_ndcg@10": 0.4022720775585408,
"eval_dim_256_cosine_precision@1": 0.38092189500640206,
"eval_dim_256_cosine_precision@10": 0.3419974391805378,
"eval_dim_256_cosine_precision@3": 0.3800682885189927,
"eval_dim_256_cosine_precision@5": 0.3714468629961588,
"eval_dim_256_cosine_recall@1": 0.03938517779616356,
"eval_dim_256_cosine_recall@10": 0.2686379160273794,
"eval_dim_256_cosine_recall@3": 0.115945325123842,
"eval_dim_256_cosine_recall@5": 0.1763856331416056,
"eval_dim_512_cosine_accuracy@1": 0.39820742637644047,
"eval_dim_512_cosine_accuracy@10": 0.45902688860435337,
"eval_dim_512_cosine_accuracy@3": 0.4026888604353393,
"eval_dim_512_cosine_accuracy@5": 0.42701664532650446,
"eval_dim_512_cosine_map@100": 0.4748374115934728,
"eval_dim_512_cosine_mrr@10": 0.4087001808832792,
"eval_dim_512_cosine_ndcg@10": 0.41985375125260577,
"eval_dim_512_cosine_precision@1": 0.39820742637644047,
"eval_dim_512_cosine_precision@10": 0.356978233034571,
"eval_dim_512_cosine_precision@3": 0.39820742637644047,
"eval_dim_512_cosine_precision@5": 0.3892445582586428,
"eval_dim_512_cosine_recall@1": 0.04102662618120145,
"eval_dim_512_cosine_recall@10": 0.27956498455762785,
"eval_dim_512_cosine_recall@3": 0.12062294908153026,
"eval_dim_512_cosine_recall@5": 0.18402636375152,
"eval_dim_64_cosine_accuracy@1": 0.3002560819462228,
"eval_dim_64_cosine_accuracy@10": 0.3649167733674776,
"eval_dim_64_cosine_accuracy@3": 0.3072983354673495,
"eval_dim_64_cosine_accuracy@5": 0.33034571062740076,
"eval_dim_64_cosine_map@100": 0.37592384285873587,
"eval_dim_64_cosine_mrr@10": 0.31124626547161705,
"eval_dim_64_cosine_ndcg@10": 0.32194373763795797,
"eval_dim_64_cosine_precision@1": 0.3002560819462228,
"eval_dim_64_cosine_precision@10": 0.2714468629961588,
"eval_dim_64_cosine_precision@3": 0.30110968843363206,
"eval_dim_64_cosine_precision@5": 0.29475032010243274,
"eval_dim_64_cosine_recall@1": 0.03258312564919841,
"eval_dim_64_cosine_recall@10": 0.21983024392840253,
"eval_dim_64_cosine_recall@3": 0.09635373620336293,
"eval_dim_64_cosine_recall@5": 0.14603365016280198,
"eval_dim_768_cosine_accuracy@1": 0.4058898847631242,
"eval_dim_768_cosine_accuracy@10": 0.471190781049936,
"eval_dim_768_cosine_accuracy@3": 0.41037131882202305,
"eval_dim_768_cosine_accuracy@5": 0.4385403329065301,
"eval_dim_768_cosine_map@100": 0.48482154237960223,
"eval_dim_768_cosine_mrr@10": 0.4170203036400217,
"eval_dim_768_cosine_ndcg@10": 0.4292262848394862,
"eval_dim_768_cosine_precision@1": 0.4058898847631242,
"eval_dim_768_cosine_precision@10": 0.36651728553137003,
"eval_dim_768_cosine_precision@3": 0.4050362782757149,
"eval_dim_768_cosine_precision@5": 0.39705505761843796,
"eval_dim_768_cosine_recall@1": 0.04172967581938629,
"eval_dim_768_cosine_recall@10": 0.2836218270585116,
"eval_dim_768_cosine_recall@3": 0.12212076683897896,
"eval_dim_768_cosine_recall@5": 0.18584066050972378,
"eval_runtime": 183.4217,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.32194373763795797,
"eval_steps_per_second": 0.0,
"step": 588
}
],
"logging_steps": 1,
"max_steps": 1960,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 1
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}