IoannisKat1's picture
Add finetuned model
807a21f verified
{
"best_global_step": 98,
"best_metric": 0.323940756796795,
"best_model_checkpoint": "intfloat/multilingual-e5-large/checkpoint-98",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 196,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01020408163265306,
"grad_norm": 1059.8211669921875,
"learning_rate": 0.0,
"loss": 9.6954,
"step": 1
},
{
"epoch": 0.02040816326530612,
"grad_norm": 890.8715209960938,
"learning_rate": 1.0204081632653061e-07,
"loss": 11.5048,
"step": 2
},
{
"epoch": 0.030612244897959183,
"grad_norm": 264.5753173828125,
"learning_rate": 2.0408163265306121e-07,
"loss": 2.1575,
"step": 3
},
{
"epoch": 0.04081632653061224,
"grad_norm": 501.6875,
"learning_rate": 3.0612244897959183e-07,
"loss": 2.6843,
"step": 4
},
{
"epoch": 0.05102040816326531,
"grad_norm": 6.634378910064697,
"learning_rate": 4.0816326530612243e-07,
"loss": 0.0364,
"step": 5
},
{
"epoch": 0.061224489795918366,
"grad_norm": 206.64352416992188,
"learning_rate": 5.102040816326531e-07,
"loss": 0.705,
"step": 6
},
{
"epoch": 0.07142857142857142,
"grad_norm": 383.0555114746094,
"learning_rate": 6.122448979591837e-07,
"loss": 1.9957,
"step": 7
},
{
"epoch": 0.08163265306122448,
"grad_norm": 212.91329956054688,
"learning_rate": 7.142857142857143e-07,
"loss": 0.9938,
"step": 8
},
{
"epoch": 0.09183673469387756,
"grad_norm": 49.36940383911133,
"learning_rate": 8.163265306122449e-07,
"loss": 0.3187,
"step": 9
},
{
"epoch": 0.10204081632653061,
"grad_norm": 51.765472412109375,
"learning_rate": 9.183673469387756e-07,
"loss": 0.1435,
"step": 10
},
{
"epoch": 0.11224489795918367,
"grad_norm": 13.23577880859375,
"learning_rate": 1.0204081632653063e-06,
"loss": 0.0818,
"step": 11
},
{
"epoch": 0.12244897959183673,
"grad_norm": 240.9364776611328,
"learning_rate": 1.122448979591837e-06,
"loss": 0.6535,
"step": 12
},
{
"epoch": 0.1326530612244898,
"grad_norm": 117.47791290283203,
"learning_rate": 1.2244897959183673e-06,
"loss": 0.3915,
"step": 13
},
{
"epoch": 0.14285714285714285,
"grad_norm": 202.8033447265625,
"learning_rate": 1.3265306122448982e-06,
"loss": 0.5493,
"step": 14
},
{
"epoch": 0.15306122448979592,
"grad_norm": 111.81350708007812,
"learning_rate": 1.4285714285714286e-06,
"loss": 0.7231,
"step": 15
},
{
"epoch": 0.16326530612244897,
"grad_norm": 10.884031295776367,
"learning_rate": 1.5306122448979593e-06,
"loss": 0.0715,
"step": 16
},
{
"epoch": 0.17346938775510204,
"grad_norm": 862.8134155273438,
"learning_rate": 1.6326530612244897e-06,
"loss": 5.8663,
"step": 17
},
{
"epoch": 0.1836734693877551,
"grad_norm": 54.59718322753906,
"learning_rate": 1.7346938775510206e-06,
"loss": 0.2586,
"step": 18
},
{
"epoch": 0.19387755102040816,
"grad_norm": 140.5866241455078,
"learning_rate": 1.8367346938775512e-06,
"loss": 0.9353,
"step": 19
},
{
"epoch": 0.20408163265306123,
"grad_norm": 479.35052490234375,
"learning_rate": 1.938775510204082e-06,
"loss": 2.5843,
"step": 20
},
{
"epoch": 0.21428571428571427,
"grad_norm": 388.5758361816406,
"learning_rate": 2.0408163265306125e-06,
"loss": 2.0583,
"step": 21
},
{
"epoch": 0.22448979591836735,
"grad_norm": 619.6573486328125,
"learning_rate": 2.1428571428571427e-06,
"loss": 6.9121,
"step": 22
},
{
"epoch": 0.23469387755102042,
"grad_norm": 120.95680236816406,
"learning_rate": 2.244897959183674e-06,
"loss": 1.0921,
"step": 23
},
{
"epoch": 0.24489795918367346,
"grad_norm": 861.7152709960938,
"learning_rate": 2.3469387755102044e-06,
"loss": 5.4863,
"step": 24
},
{
"epoch": 0.25510204081632654,
"grad_norm": 7.463388919830322,
"learning_rate": 2.4489795918367347e-06,
"loss": 0.0549,
"step": 25
},
{
"epoch": 0.2653061224489796,
"grad_norm": 357.8828125,
"learning_rate": 2.5510204081632657e-06,
"loss": 2.345,
"step": 26
},
{
"epoch": 0.2755102040816326,
"grad_norm": 390.43023681640625,
"learning_rate": 2.6530612244897964e-06,
"loss": 4.264,
"step": 27
},
{
"epoch": 0.2857142857142857,
"grad_norm": 382.8008728027344,
"learning_rate": 2.7551020408163266e-06,
"loss": 2.4847,
"step": 28
},
{
"epoch": 0.29591836734693877,
"grad_norm": 159.14295959472656,
"learning_rate": 2.8571428571428573e-06,
"loss": 0.7634,
"step": 29
},
{
"epoch": 0.30612244897959184,
"grad_norm": 361.00201416015625,
"learning_rate": 2.959183673469388e-06,
"loss": 2.047,
"step": 30
},
{
"epoch": 0.3163265306122449,
"grad_norm": 81.90242004394531,
"learning_rate": 3.0612244897959185e-06,
"loss": 0.694,
"step": 31
},
{
"epoch": 0.32653061224489793,
"grad_norm": 294.4106750488281,
"learning_rate": 3.1632653061224496e-06,
"loss": 0.7417,
"step": 32
},
{
"epoch": 0.336734693877551,
"grad_norm": 322.22308349609375,
"learning_rate": 3.2653061224489794e-06,
"loss": 1.9942,
"step": 33
},
{
"epoch": 0.3469387755102041,
"grad_norm": 449.238525390625,
"learning_rate": 3.3673469387755105e-06,
"loss": 2.8978,
"step": 34
},
{
"epoch": 0.35714285714285715,
"grad_norm": 1.8455325365066528,
"learning_rate": 3.469387755102041e-06,
"loss": 0.0126,
"step": 35
},
{
"epoch": 0.3673469387755102,
"grad_norm": 310.52740478515625,
"learning_rate": 3.5714285714285718e-06,
"loss": 1.9776,
"step": 36
},
{
"epoch": 0.37755102040816324,
"grad_norm": 237.73545837402344,
"learning_rate": 3.6734693877551024e-06,
"loss": 1.5667,
"step": 37
},
{
"epoch": 0.3877551020408163,
"grad_norm": 817.3215942382812,
"learning_rate": 3.7755102040816327e-06,
"loss": 5.5693,
"step": 38
},
{
"epoch": 0.3979591836734694,
"grad_norm": 364.91326904296875,
"learning_rate": 3.877551020408164e-06,
"loss": 1.6802,
"step": 39
},
{
"epoch": 0.40816326530612246,
"grad_norm": 34.629112243652344,
"learning_rate": 3.979591836734694e-06,
"loss": 0.2144,
"step": 40
},
{
"epoch": 0.41836734693877553,
"grad_norm": 43.43345260620117,
"learning_rate": 4.081632653061225e-06,
"loss": 0.1797,
"step": 41
},
{
"epoch": 0.42857142857142855,
"grad_norm": 544.13134765625,
"learning_rate": 4.183673469387755e-06,
"loss": 5.7559,
"step": 42
},
{
"epoch": 0.4387755102040816,
"grad_norm": 357.5466003417969,
"learning_rate": 4.2857142857142855e-06,
"loss": 2.6372,
"step": 43
},
{
"epoch": 0.4489795918367347,
"grad_norm": 458.7740783691406,
"learning_rate": 4.3877551020408165e-06,
"loss": 1.8447,
"step": 44
},
{
"epoch": 0.45918367346938777,
"grad_norm": 668.6949462890625,
"learning_rate": 4.489795918367348e-06,
"loss": 2.8156,
"step": 45
},
{
"epoch": 0.46938775510204084,
"grad_norm": 382.7512512207031,
"learning_rate": 4.591836734693878e-06,
"loss": 3.1588,
"step": 46
},
{
"epoch": 0.47959183673469385,
"grad_norm": 9.869824409484863,
"learning_rate": 4.693877551020409e-06,
"loss": 0.0552,
"step": 47
},
{
"epoch": 0.4897959183673469,
"grad_norm": 448.4170837402344,
"learning_rate": 4.795918367346939e-06,
"loss": 3.3053,
"step": 48
},
{
"epoch": 0.5,
"grad_norm": 471.82855224609375,
"learning_rate": 4.897959183673469e-06,
"loss": 2.8332,
"step": 49
},
{
"epoch": 0.5102040816326531,
"grad_norm": 210.9025115966797,
"learning_rate": 5e-06,
"loss": 1.1961,
"step": 50
},
{
"epoch": 0.5204081632653061,
"grad_norm": 164.64920043945312,
"learning_rate": 5.1020408163265315e-06,
"loss": 1.0106,
"step": 51
},
{
"epoch": 0.5306122448979592,
"grad_norm": 386.0244140625,
"learning_rate": 5.204081632653062e-06,
"loss": 2.4593,
"step": 52
},
{
"epoch": 0.5408163265306123,
"grad_norm": 419.1893310546875,
"learning_rate": 5.306122448979593e-06,
"loss": 3.4849,
"step": 53
},
{
"epoch": 0.5510204081632653,
"grad_norm": 10.212640762329102,
"learning_rate": 5.408163265306123e-06,
"loss": 0.0338,
"step": 54
},
{
"epoch": 0.5612244897959183,
"grad_norm": 419.79815673828125,
"learning_rate": 5.510204081632653e-06,
"loss": 1.5319,
"step": 55
},
{
"epoch": 0.5714285714285714,
"grad_norm": 6.56746768951416,
"learning_rate": 5.6122448979591834e-06,
"loss": 0.0419,
"step": 56
},
{
"epoch": 0.5816326530612245,
"grad_norm": 15.671188354492188,
"learning_rate": 5.7142857142857145e-06,
"loss": 0.1098,
"step": 57
},
{
"epoch": 0.5918367346938775,
"grad_norm": 8.410639762878418,
"learning_rate": 5.816326530612246e-06,
"loss": 0.0457,
"step": 58
},
{
"epoch": 0.6020408163265306,
"grad_norm": 6.06464147567749,
"learning_rate": 5.918367346938776e-06,
"loss": 0.0273,
"step": 59
},
{
"epoch": 0.6122448979591837,
"grad_norm": 293.1927185058594,
"learning_rate": 6.020408163265307e-06,
"loss": 1.2946,
"step": 60
},
{
"epoch": 0.6224489795918368,
"grad_norm": 189.53306579589844,
"learning_rate": 6.122448979591837e-06,
"loss": 3.4121,
"step": 61
},
{
"epoch": 0.6326530612244898,
"grad_norm": 455.4539489746094,
"learning_rate": 6.224489795918368e-06,
"loss": 2.6015,
"step": 62
},
{
"epoch": 0.6428571428571429,
"grad_norm": 351.75830078125,
"learning_rate": 6.326530612244899e-06,
"loss": 2.0358,
"step": 63
},
{
"epoch": 0.6530612244897959,
"grad_norm": 1400.6083984375,
"learning_rate": 6.4285714285714295e-06,
"loss": 7.3114,
"step": 64
},
{
"epoch": 0.6632653061224489,
"grad_norm": 1247.4736328125,
"learning_rate": 6.530612244897959e-06,
"loss": 6.8888,
"step": 65
},
{
"epoch": 0.673469387755102,
"grad_norm": 296.157470703125,
"learning_rate": 6.63265306122449e-06,
"loss": 1.6606,
"step": 66
},
{
"epoch": 0.6836734693877551,
"grad_norm": 461.7646484375,
"learning_rate": 6.734693877551021e-06,
"loss": 5.2343,
"step": 67
},
{
"epoch": 0.6938775510204082,
"grad_norm": 443.15264892578125,
"learning_rate": 6.836734693877551e-06,
"loss": 2.1977,
"step": 68
},
{
"epoch": 0.7040816326530612,
"grad_norm": 31.333446502685547,
"learning_rate": 6.938775510204082e-06,
"loss": 0.1702,
"step": 69
},
{
"epoch": 0.7142857142857143,
"grad_norm": 603.3770751953125,
"learning_rate": 7.0408163265306125e-06,
"loss": 3.5715,
"step": 70
},
{
"epoch": 0.7244897959183674,
"grad_norm": 190.58395385742188,
"learning_rate": 7.1428571428571436e-06,
"loss": 1.4736,
"step": 71
},
{
"epoch": 0.7346938775510204,
"grad_norm": 211.7954559326172,
"learning_rate": 7.244897959183675e-06,
"loss": 1.0967,
"step": 72
},
{
"epoch": 0.7448979591836735,
"grad_norm": 288.28448486328125,
"learning_rate": 7.346938775510205e-06,
"loss": 1.2098,
"step": 73
},
{
"epoch": 0.7551020408163265,
"grad_norm": 535.2803344726562,
"learning_rate": 7.448979591836736e-06,
"loss": 1.9541,
"step": 74
},
{
"epoch": 0.7653061224489796,
"grad_norm": 1270.0836181640625,
"learning_rate": 7.551020408163265e-06,
"loss": 4.0992,
"step": 75
},
{
"epoch": 0.7755102040816326,
"grad_norm": 2.131913661956787,
"learning_rate": 7.653061224489796e-06,
"loss": 0.0145,
"step": 76
},
{
"epoch": 0.7857142857142857,
"grad_norm": 2.6484782695770264,
"learning_rate": 7.755102040816327e-06,
"loss": 0.0079,
"step": 77
},
{
"epoch": 0.7959183673469388,
"grad_norm": 18.671253204345703,
"learning_rate": 7.857142857142858e-06,
"loss": 0.1081,
"step": 78
},
{
"epoch": 0.8061224489795918,
"grad_norm": 282.3451843261719,
"learning_rate": 7.959183673469388e-06,
"loss": 1.7446,
"step": 79
},
{
"epoch": 0.8163265306122449,
"grad_norm": 303.16900634765625,
"learning_rate": 8.06122448979592e-06,
"loss": 0.6343,
"step": 80
},
{
"epoch": 0.826530612244898,
"grad_norm": 899.4592895507812,
"learning_rate": 8.16326530612245e-06,
"loss": 4.7374,
"step": 81
},
{
"epoch": 0.8367346938775511,
"grad_norm": 600.3280639648438,
"learning_rate": 8.26530612244898e-06,
"loss": 3.1082,
"step": 82
},
{
"epoch": 0.8469387755102041,
"grad_norm": 3.1936967372894287,
"learning_rate": 8.36734693877551e-06,
"loss": 0.0144,
"step": 83
},
{
"epoch": 0.8571428571428571,
"grad_norm": 1.3846139907836914,
"learning_rate": 8.469387755102042e-06,
"loss": 0.0057,
"step": 84
},
{
"epoch": 0.8673469387755102,
"grad_norm": 197.3724822998047,
"learning_rate": 8.571428571428571e-06,
"loss": 0.7656,
"step": 85
},
{
"epoch": 0.8775510204081632,
"grad_norm": 545.4349365234375,
"learning_rate": 8.673469387755103e-06,
"loss": 1.5191,
"step": 86
},
{
"epoch": 0.8877551020408163,
"grad_norm": 121.33210754394531,
"learning_rate": 8.775510204081633e-06,
"loss": 0.1942,
"step": 87
},
{
"epoch": 0.8979591836734694,
"grad_norm": 48.77962112426758,
"learning_rate": 8.877551020408163e-06,
"loss": 0.2429,
"step": 88
},
{
"epoch": 0.9081632653061225,
"grad_norm": 664.6809692382812,
"learning_rate": 8.979591836734695e-06,
"loss": 7.0608,
"step": 89
},
{
"epoch": 0.9183673469387755,
"grad_norm": 67.93673706054688,
"learning_rate": 9.081632653061225e-06,
"loss": 0.1635,
"step": 90
},
{
"epoch": 0.9285714285714286,
"grad_norm": 14.213589668273926,
"learning_rate": 9.183673469387756e-06,
"loss": 0.057,
"step": 91
},
{
"epoch": 0.9387755102040817,
"grad_norm": 550.2100219726562,
"learning_rate": 9.285714285714288e-06,
"loss": 3.1796,
"step": 92
},
{
"epoch": 0.9489795918367347,
"grad_norm": 516.64794921875,
"learning_rate": 9.387755102040818e-06,
"loss": 2.4068,
"step": 93
},
{
"epoch": 0.9591836734693877,
"grad_norm": 227.85704040527344,
"learning_rate": 9.489795918367348e-06,
"loss": 0.9694,
"step": 94
},
{
"epoch": 0.9693877551020408,
"grad_norm": 103.72978973388672,
"learning_rate": 9.591836734693878e-06,
"loss": 0.4878,
"step": 95
},
{
"epoch": 0.9795918367346939,
"grad_norm": 113.07623291015625,
"learning_rate": 9.693877551020408e-06,
"loss": 0.4105,
"step": 96
},
{
"epoch": 0.9897959183673469,
"grad_norm": 1049.9190673828125,
"learning_rate": 9.795918367346939e-06,
"loss": 4.5006,
"step": 97
},
{
"epoch": 1.0,
"grad_norm": 419.75555419921875,
"learning_rate": 9.89795918367347e-06,
"loss": 2.2675,
"step": 98
},
{
"epoch": 1.0,
"eval_dim_1024_cosine_accuracy@1": 0.34571062740076824,
"eval_dim_1024_cosine_accuracy@10": 0.4231754161331626,
"eval_dim_1024_cosine_accuracy@3": 0.352112676056338,
"eval_dim_1024_cosine_accuracy@5": 0.3854033290653009,
"eval_dim_1024_cosine_map@100": 0.4413040417287197,
"eval_dim_1024_cosine_mrr@10": 0.3591188545413894,
"eval_dim_1024_cosine_ndcg@10": 0.37218117046458954,
"eval_dim_1024_cosine_precision@1": 0.34571062740076824,
"eval_dim_1024_cosine_precision@10": 0.3060179257362356,
"eval_dim_1024_cosine_precision@3": 0.34571062740076824,
"eval_dim_1024_cosine_precision@5": 0.33751600512163893,
"eval_dim_1024_cosine_recall@1": 0.04202665510348477,
"eval_dim_1024_cosine_recall@10": 0.2720738817855689,
"eval_dim_1024_cosine_recall@3": 0.12344281372964075,
"eval_dim_1024_cosine_recall@5": 0.1862277356935127,
"eval_dim_128_cosine_accuracy@1": 0.30089628681177977,
"eval_dim_128_cosine_accuracy@10": 0.3687580025608195,
"eval_dim_128_cosine_accuracy@3": 0.3060179257362356,
"eval_dim_128_cosine_accuracy@5": 0.33354673495518566,
"eval_dim_128_cosine_map@100": 0.383805456031232,
"eval_dim_128_cosine_mrr@10": 0.31238618580167843,
"eval_dim_128_cosine_ndcg@10": 0.323940756796795,
"eval_dim_128_cosine_precision@1": 0.30089628681177977,
"eval_dim_128_cosine_precision@10": 0.2669014084507042,
"eval_dim_128_cosine_precision@3": 0.3006828851899274,
"eval_dim_128_cosine_precision@5": 0.29334186939820744,
"eval_dim_128_cosine_recall@1": 0.03621488699964182,
"eval_dim_128_cosine_recall@10": 0.23934767939840923,
"eval_dim_128_cosine_recall@3": 0.10619628777590438,
"eval_dim_128_cosine_recall@5": 0.16065683687574547,
"eval_dim_256_cosine_accuracy@1": 0.3181818181818182,
"eval_dim_256_cosine_accuracy@10": 0.39244558258642764,
"eval_dim_256_cosine_accuracy@3": 0.323303457106274,
"eval_dim_256_cosine_accuracy@5": 0.35723431498079383,
"eval_dim_256_cosine_map@100": 0.4066610643364293,
"eval_dim_256_cosine_mrr@10": 0.3309775318578131,
"eval_dim_256_cosine_ndcg@10": 0.34364332074782783,
"eval_dim_256_cosine_precision@1": 0.3181818181818182,
"eval_dim_256_cosine_precision@10": 0.28348271446862994,
"eval_dim_256_cosine_precision@3": 0.3175416133162612,
"eval_dim_256_cosine_precision@5": 0.31024327784891165,
"eval_dim_256_cosine_recall@1": 0.03856083314138909,
"eval_dim_256_cosine_recall@10": 0.2532060107296034,
"eval_dim_256_cosine_recall@3": 0.11327276127499253,
"eval_dim_256_cosine_recall@5": 0.17121955970972744,
"eval_dim_512_cosine_accuracy@1": 0.33674775928297057,
"eval_dim_512_cosine_accuracy@10": 0.41613316261203587,
"eval_dim_512_cosine_accuracy@3": 0.34314980793854033,
"eval_dim_512_cosine_accuracy@5": 0.37708066581306016,
"eval_dim_512_cosine_map@100": 0.4299582620106213,
"eval_dim_512_cosine_mrr@10": 0.3503564315184028,
"eval_dim_512_cosine_ndcg@10": 0.362727691265461,
"eval_dim_512_cosine_precision@1": 0.33674775928297057,
"eval_dim_512_cosine_precision@10": 0.2976312419974392,
"eval_dim_512_cosine_precision@3": 0.3363209560392659,
"eval_dim_512_cosine_precision@5": 0.32816901408450705,
"eval_dim_512_cosine_recall@1": 0.04109877030791802,
"eval_dim_512_cosine_recall@10": 0.26524116778193035,
"eval_dim_512_cosine_recall@3": 0.12056066628670095,
"eval_dim_512_cosine_recall@5": 0.18158399214837667,
"eval_dim_64_cosine_accuracy@1": 0.24647887323943662,
"eval_dim_64_cosine_accuracy@10": 0.31562099871959026,
"eval_dim_64_cosine_accuracy@3": 0.25096030729833546,
"eval_dim_64_cosine_accuracy@5": 0.27784891165172854,
"eval_dim_64_cosine_map@100": 0.32717183255723853,
"eval_dim_64_cosine_mrr@10": 0.25797791801312897,
"eval_dim_64_cosine_ndcg@10": 0.2694272980700995,
"eval_dim_64_cosine_precision@1": 0.24647887323943662,
"eval_dim_64_cosine_precision@10": 0.22029449423815622,
"eval_dim_64_cosine_precision@3": 0.24647887323943662,
"eval_dim_64_cosine_precision@5": 0.24033290653008965,
"eval_dim_64_cosine_recall@1": 0.030698008105366027,
"eval_dim_64_cosine_recall@10": 0.20675794932386124,
"eval_dim_64_cosine_recall@3": 0.0905145081182266,
"eval_dim_64_cosine_recall@5": 0.13744818119581018,
"eval_dim_768_cosine_accuracy@1": 0.3412291933418694,
"eval_dim_768_cosine_accuracy@10": 0.4174135723431498,
"eval_dim_768_cosine_accuracy@3": 0.34763124199743917,
"eval_dim_768_cosine_accuracy@5": 0.37964148527528807,
"eval_dim_768_cosine_map@100": 0.4350972821264766,
"eval_dim_768_cosine_mrr@10": 0.35454393024815517,
"eval_dim_768_cosine_ndcg@10": 0.36777711697459586,
"eval_dim_768_cosine_precision@1": 0.3412291933418694,
"eval_dim_768_cosine_precision@10": 0.3030089628681178,
"eval_dim_768_cosine_precision@3": 0.3414425949637217,
"eval_dim_768_cosine_precision@5": 0.33341869398207424,
"eval_dim_768_cosine_recall@1": 0.041218661006119914,
"eval_dim_768_cosine_recall@10": 0.2695735755366756,
"eval_dim_768_cosine_recall@3": 0.12127491338376899,
"eval_dim_768_cosine_recall@5": 0.18314999106768198,
"eval_runtime": 98.9256,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.2694272980700995,
"eval_steps_per_second": 0.0,
"step": 98
},
{
"epoch": 1.010204081632653,
"grad_norm": 164.98011779785156,
"learning_rate": 1e-05,
"loss": 0.9602,
"step": 99
},
{
"epoch": 1.0204081632653061,
"grad_norm": 928.5584106445312,
"learning_rate": 1.0102040816326531e-05,
"loss": 5.0193,
"step": 100
},
{
"epoch": 1.030612244897959,
"grad_norm": 144.56446838378906,
"learning_rate": 1.0204081632653063e-05,
"loss": 1.1252,
"step": 101
},
{
"epoch": 1.0408163265306123,
"grad_norm": 185.31761169433594,
"learning_rate": 1.0306122448979591e-05,
"loss": 0.7896,
"step": 102
},
{
"epoch": 1.0510204081632653,
"grad_norm": 331.85076904296875,
"learning_rate": 1.0408163265306123e-05,
"loss": 1.2793,
"step": 103
},
{
"epoch": 1.0612244897959184,
"grad_norm": 70.96832275390625,
"learning_rate": 1.0510204081632654e-05,
"loss": 0.3422,
"step": 104
},
{
"epoch": 1.0714285714285714,
"grad_norm": 5.096808910369873,
"learning_rate": 1.0612244897959186e-05,
"loss": 0.0204,
"step": 105
},
{
"epoch": 1.0816326530612246,
"grad_norm": 5.112201690673828,
"learning_rate": 1.0714285714285714e-05,
"loss": 0.018,
"step": 106
},
{
"epoch": 1.0918367346938775,
"grad_norm": 1.532448410987854,
"learning_rate": 1.0816326530612246e-05,
"loss": 0.0082,
"step": 107
},
{
"epoch": 1.1020408163265305,
"grad_norm": 784.9140625,
"learning_rate": 1.0918367346938776e-05,
"loss": 6.0895,
"step": 108
},
{
"epoch": 1.1122448979591837,
"grad_norm": 3.4372496604919434,
"learning_rate": 1.1020408163265306e-05,
"loss": 0.0115,
"step": 109
},
{
"epoch": 1.1224489795918366,
"grad_norm": 45.12334442138672,
"learning_rate": 1.1122448979591838e-05,
"loss": 0.2657,
"step": 110
},
{
"epoch": 1.1326530612244898,
"grad_norm": 3.1384634971618652,
"learning_rate": 1.1224489795918367e-05,
"loss": 0.0232,
"step": 111
},
{
"epoch": 1.1428571428571428,
"grad_norm": 225.58758544921875,
"learning_rate": 1.1326530612244899e-05,
"loss": 1.4261,
"step": 112
},
{
"epoch": 1.153061224489796,
"grad_norm": 858.461181640625,
"learning_rate": 1.1428571428571429e-05,
"loss": 5.6396,
"step": 113
},
{
"epoch": 1.163265306122449,
"grad_norm": 57.779808044433594,
"learning_rate": 1.1530612244897961e-05,
"loss": 0.2395,
"step": 114
},
{
"epoch": 1.1734693877551021,
"grad_norm": 0.24931341409683228,
"learning_rate": 1.1632653061224491e-05,
"loss": 0.001,
"step": 115
},
{
"epoch": 1.183673469387755,
"grad_norm": 307.6712646484375,
"learning_rate": 1.1734693877551021e-05,
"loss": 1.053,
"step": 116
},
{
"epoch": 1.193877551020408,
"grad_norm": 7.2576212882995605,
"learning_rate": 1.1836734693877552e-05,
"loss": 0.0335,
"step": 117
},
{
"epoch": 1.2040816326530612,
"grad_norm": 367.11077880859375,
"learning_rate": 1.1938775510204084e-05,
"loss": 1.9711,
"step": 118
},
{
"epoch": 1.2142857142857142,
"grad_norm": 433.3495178222656,
"learning_rate": 1.2040816326530614e-05,
"loss": 1.7967,
"step": 119
},
{
"epoch": 1.2244897959183674,
"grad_norm": 1.5050230026245117,
"learning_rate": 1.2142857142857142e-05,
"loss": 0.0046,
"step": 120
},
{
"epoch": 1.2346938775510203,
"grad_norm": 0.04484110698103905,
"learning_rate": 1.2244897959183674e-05,
"loss": 0.0002,
"step": 121
},
{
"epoch": 1.2448979591836735,
"grad_norm": 43.718109130859375,
"learning_rate": 1.2346938775510204e-05,
"loss": 0.0585,
"step": 122
},
{
"epoch": 1.2551020408163265,
"grad_norm": 109.87353515625,
"learning_rate": 1.2448979591836736e-05,
"loss": 0.3547,
"step": 123
},
{
"epoch": 1.2653061224489797,
"grad_norm": 952.017578125,
"learning_rate": 1.2551020408163267e-05,
"loss": 6.193,
"step": 124
},
{
"epoch": 1.2755102040816326,
"grad_norm": 2.541386365890503,
"learning_rate": 1.2653061224489798e-05,
"loss": 0.0073,
"step": 125
},
{
"epoch": 1.2857142857142856,
"grad_norm": 116.8653793334961,
"learning_rate": 1.2755102040816327e-05,
"loss": 0.3095,
"step": 126
},
{
"epoch": 1.2959183673469388,
"grad_norm": 1.1585338115692139,
"learning_rate": 1.2857142857142859e-05,
"loss": 0.0026,
"step": 127
},
{
"epoch": 1.306122448979592,
"grad_norm": 2.4128425121307373,
"learning_rate": 1.2959183673469389e-05,
"loss": 0.0065,
"step": 128
},
{
"epoch": 1.316326530612245,
"grad_norm": 11.24067497253418,
"learning_rate": 1.3061224489795918e-05,
"loss": 0.0326,
"step": 129
},
{
"epoch": 1.3265306122448979,
"grad_norm": 3.200199842453003,
"learning_rate": 1.316326530612245e-05,
"loss": 0.0121,
"step": 130
},
{
"epoch": 1.336734693877551,
"grad_norm": 314.07379150390625,
"learning_rate": 1.326530612244898e-05,
"loss": 2.081,
"step": 131
},
{
"epoch": 1.346938775510204,
"grad_norm": 11.617761611938477,
"learning_rate": 1.3367346938775512e-05,
"loss": 0.0329,
"step": 132
},
{
"epoch": 1.3571428571428572,
"grad_norm": 579.8010864257812,
"learning_rate": 1.3469387755102042e-05,
"loss": 4.8144,
"step": 133
},
{
"epoch": 1.3673469387755102,
"grad_norm": 502.57574462890625,
"learning_rate": 1.3571428571428574e-05,
"loss": 1.8287,
"step": 134
},
{
"epoch": 1.3775510204081631,
"grad_norm": 0.3074304163455963,
"learning_rate": 1.3673469387755102e-05,
"loss": 0.0016,
"step": 135
},
{
"epoch": 1.3877551020408163,
"grad_norm": 674.5262451171875,
"learning_rate": 1.3775510204081634e-05,
"loss": 2.7057,
"step": 136
},
{
"epoch": 1.3979591836734695,
"grad_norm": 2.085172176361084,
"learning_rate": 1.3877551020408165e-05,
"loss": 0.0087,
"step": 137
},
{
"epoch": 1.4081632653061225,
"grad_norm": 146.2124786376953,
"learning_rate": 1.3979591836734696e-05,
"loss": 0.7368,
"step": 138
},
{
"epoch": 1.4183673469387754,
"grad_norm": 39.309967041015625,
"learning_rate": 1.4081632653061225e-05,
"loss": 0.1354,
"step": 139
},
{
"epoch": 1.4285714285714286,
"grad_norm": 18.70266342163086,
"learning_rate": 1.4183673469387755e-05,
"loss": 0.0446,
"step": 140
},
{
"epoch": 1.4387755102040816,
"grad_norm": 60.91520690917969,
"learning_rate": 1.4285714285714287e-05,
"loss": 0.2849,
"step": 141
},
{
"epoch": 1.4489795918367347,
"grad_norm": 560.072509765625,
"learning_rate": 1.4387755102040817e-05,
"loss": 6.2924,
"step": 142
},
{
"epoch": 1.4591836734693877,
"grad_norm": 161.52471923828125,
"learning_rate": 1.448979591836735e-05,
"loss": 0.4827,
"step": 143
},
{
"epoch": 1.469387755102041,
"grad_norm": 870.7469482421875,
"learning_rate": 1.4591836734693878e-05,
"loss": 7.8315,
"step": 144
},
{
"epoch": 1.4795918367346939,
"grad_norm": 716.3013305664062,
"learning_rate": 1.469387755102041e-05,
"loss": 6.0618,
"step": 145
},
{
"epoch": 1.489795918367347,
"grad_norm": 242.5034637451172,
"learning_rate": 1.479591836734694e-05,
"loss": 1.0472,
"step": 146
},
{
"epoch": 1.5,
"grad_norm": 0.19287815690040588,
"learning_rate": 1.4897959183673472e-05,
"loss": 0.0007,
"step": 147
},
{
"epoch": 1.510204081632653,
"grad_norm": 14.570304870605469,
"learning_rate": 1.5000000000000002e-05,
"loss": 0.0433,
"step": 148
},
{
"epoch": 1.5204081632653061,
"grad_norm": 183.6922607421875,
"learning_rate": 1.510204081632653e-05,
"loss": 1.116,
"step": 149
},
{
"epoch": 1.5306122448979593,
"grad_norm": 390.4358825683594,
"learning_rate": 1.5204081632653063e-05,
"loss": 1.5491,
"step": 150
},
{
"epoch": 1.5408163265306123,
"grad_norm": 69.31669616699219,
"learning_rate": 1.530612244897959e-05,
"loss": 0.2423,
"step": 151
},
{
"epoch": 1.5510204081632653,
"grad_norm": 125.4091796875,
"learning_rate": 1.5408163265306123e-05,
"loss": 0.4355,
"step": 152
},
{
"epoch": 1.5612244897959182,
"grad_norm": 1.3179243803024292,
"learning_rate": 1.5510204081632655e-05,
"loss": 0.0043,
"step": 153
},
{
"epoch": 1.5714285714285714,
"grad_norm": 18.853076934814453,
"learning_rate": 1.5612244897959187e-05,
"loss": 0.059,
"step": 154
},
{
"epoch": 1.5816326530612246,
"grad_norm": 4.13621711730957,
"learning_rate": 1.5714285714285715e-05,
"loss": 0.0175,
"step": 155
},
{
"epoch": 1.5918367346938775,
"grad_norm": 265.3294982910156,
"learning_rate": 1.5816326530612247e-05,
"loss": 2.8813,
"step": 156
},
{
"epoch": 1.6020408163265305,
"grad_norm": 123.54573822021484,
"learning_rate": 1.5918367346938776e-05,
"loss": 0.4372,
"step": 157
},
{
"epoch": 1.6122448979591837,
"grad_norm": 16.84331512451172,
"learning_rate": 1.6020408163265308e-05,
"loss": 0.0611,
"step": 158
},
{
"epoch": 1.6224489795918369,
"grad_norm": 306.5478820800781,
"learning_rate": 1.612244897959184e-05,
"loss": 4.6339,
"step": 159
},
{
"epoch": 1.6326530612244898,
"grad_norm": 482.53204345703125,
"learning_rate": 1.6224489795918368e-05,
"loss": 2.1581,
"step": 160
},
{
"epoch": 1.6428571428571428,
"grad_norm": 410.847900390625,
"learning_rate": 1.63265306122449e-05,
"loss": 1.9109,
"step": 161
},
{
"epoch": 1.6530612244897958,
"grad_norm": 1973.649658203125,
"learning_rate": 1.642857142857143e-05,
"loss": 10.7888,
"step": 162
},
{
"epoch": 1.663265306122449,
"grad_norm": 184.83628845214844,
"learning_rate": 1.653061224489796e-05,
"loss": 4.4287,
"step": 163
},
{
"epoch": 1.6734693877551021,
"grad_norm": 697.2540893554688,
"learning_rate": 1.6632653061224492e-05,
"loss": 4.1106,
"step": 164
},
{
"epoch": 1.683673469387755,
"grad_norm": 513.15966796875,
"learning_rate": 1.673469387755102e-05,
"loss": 3.8159,
"step": 165
},
{
"epoch": 1.693877551020408,
"grad_norm": 23.47126007080078,
"learning_rate": 1.6836734693877553e-05,
"loss": 0.0468,
"step": 166
},
{
"epoch": 1.7040816326530612,
"grad_norm": 0.6484940648078918,
"learning_rate": 1.6938775510204085e-05,
"loss": 0.0023,
"step": 167
},
{
"epoch": 1.7142857142857144,
"grad_norm": 0.809281051158905,
"learning_rate": 1.7040816326530613e-05,
"loss": 0.0031,
"step": 168
},
{
"epoch": 1.7244897959183674,
"grad_norm": 648.1061401367188,
"learning_rate": 1.7142857142857142e-05,
"loss": 3.0379,
"step": 169
},
{
"epoch": 1.7346938775510203,
"grad_norm": 1.748665690422058,
"learning_rate": 1.7244897959183674e-05,
"loss": 0.0058,
"step": 170
},
{
"epoch": 1.7448979591836735,
"grad_norm": 2.6888437271118164,
"learning_rate": 1.7346938775510206e-05,
"loss": 0.0097,
"step": 171
},
{
"epoch": 1.7551020408163265,
"grad_norm": 44.931114196777344,
"learning_rate": 1.7448979591836738e-05,
"loss": 0.114,
"step": 172
},
{
"epoch": 1.7653061224489797,
"grad_norm": 17.355039596557617,
"learning_rate": 1.7551020408163266e-05,
"loss": 0.0376,
"step": 173
},
{
"epoch": 1.7755102040816326,
"grad_norm": 0.23637208342552185,
"learning_rate": 1.7653061224489798e-05,
"loss": 0.0006,
"step": 174
},
{
"epoch": 1.7857142857142856,
"grad_norm": 879.6422119140625,
"learning_rate": 1.7755102040816327e-05,
"loss": 1.7519,
"step": 175
},
{
"epoch": 1.7959183673469388,
"grad_norm": 783.4127807617188,
"learning_rate": 1.785714285714286e-05,
"loss": 3.5166,
"step": 176
},
{
"epoch": 1.806122448979592,
"grad_norm": 431.9672546386719,
"learning_rate": 1.795918367346939e-05,
"loss": 2.073,
"step": 177
},
{
"epoch": 1.816326530612245,
"grad_norm": 25.601001739501953,
"learning_rate": 1.806122448979592e-05,
"loss": 0.1532,
"step": 178
},
{
"epoch": 1.8265306122448979,
"grad_norm": 365.6300048828125,
"learning_rate": 1.816326530612245e-05,
"loss": 2.0969,
"step": 179
},
{
"epoch": 1.836734693877551,
"grad_norm": 620.0676879882812,
"learning_rate": 1.826530612244898e-05,
"loss": 1.867,
"step": 180
},
{
"epoch": 1.8469387755102042,
"grad_norm": 1705.588134765625,
"learning_rate": 1.836734693877551e-05,
"loss": 18.7505,
"step": 181
},
{
"epoch": 1.8571428571428572,
"grad_norm": 683.8563842773438,
"learning_rate": 1.8469387755102043e-05,
"loss": 2.5291,
"step": 182
},
{
"epoch": 1.8673469387755102,
"grad_norm": 312.474609375,
"learning_rate": 1.8571428571428575e-05,
"loss": 2.8375,
"step": 183
},
{
"epoch": 1.8775510204081631,
"grad_norm": 37.57782745361328,
"learning_rate": 1.8673469387755104e-05,
"loss": 0.0902,
"step": 184
},
{
"epoch": 1.8877551020408163,
"grad_norm": 8.877613067626953,
"learning_rate": 1.8775510204081636e-05,
"loss": 0.0139,
"step": 185
},
{
"epoch": 1.8979591836734695,
"grad_norm": 8.716789245605469,
"learning_rate": 1.8877551020408164e-05,
"loss": 0.0356,
"step": 186
},
{
"epoch": 1.9081632653061225,
"grad_norm": 21.247528076171875,
"learning_rate": 1.8979591836734696e-05,
"loss": 0.0838,
"step": 187
},
{
"epoch": 1.9183673469387754,
"grad_norm": 9.923478126525879,
"learning_rate": 1.9081632653061225e-05,
"loss": 0.0391,
"step": 188
},
{
"epoch": 1.9285714285714286,
"grad_norm": 314.7723388671875,
"learning_rate": 1.9183673469387756e-05,
"loss": 1.2579,
"step": 189
},
{
"epoch": 1.9387755102040818,
"grad_norm": 1043.2823486328125,
"learning_rate": 1.928571428571429e-05,
"loss": 9.3381,
"step": 190
},
{
"epoch": 1.9489795918367347,
"grad_norm": 26.932323455810547,
"learning_rate": 1.9387755102040817e-05,
"loss": 0.094,
"step": 191
},
{
"epoch": 1.9591836734693877,
"grad_norm": 19.69426727294922,
"learning_rate": 1.948979591836735e-05,
"loss": 0.0638,
"step": 192
},
{
"epoch": 1.9693877551020407,
"grad_norm": 537.5110473632812,
"learning_rate": 1.9591836734693877e-05,
"loss": 4.3027,
"step": 193
},
{
"epoch": 1.9795918367346939,
"grad_norm": 0.6821572780609131,
"learning_rate": 1.969387755102041e-05,
"loss": 0.002,
"step": 194
},
{
"epoch": 1.989795918367347,
"grad_norm": 476.9329528808594,
"learning_rate": 1.979591836734694e-05,
"loss": 0.9772,
"step": 195
},
{
"epoch": 2.0,
"grad_norm": 2.22475266456604,
"learning_rate": 1.9897959183673473e-05,
"loss": 0.0053,
"step": 196
},
{
"epoch": 2.0,
"eval_dim_1024_cosine_accuracy@1": 0.3290653008962868,
"eval_dim_1024_cosine_accuracy@10": 0.3886043533930858,
"eval_dim_1024_cosine_accuracy@3": 0.3348271446862996,
"eval_dim_1024_cosine_accuracy@5": 0.3559539052496799,
"eval_dim_1024_cosine_map@100": 0.4165482880126111,
"eval_dim_1024_cosine_mrr@10": 0.3392725037091231,
"eval_dim_1024_cosine_ndcg@10": 0.34967137880514326,
"eval_dim_1024_cosine_precision@1": 0.3290653008962868,
"eval_dim_1024_cosine_precision@10": 0.28380281690140846,
"eval_dim_1024_cosine_precision@3": 0.32885189927443453,
"eval_dim_1024_cosine_precision@5": 0.31869398207426375,
"eval_dim_1024_cosine_recall@1": 0.04062540337753272,
"eval_dim_1024_cosine_recall@10": 0.2609802153031206,
"eval_dim_1024_cosine_recall@3": 0.11937529555421877,
"eval_dim_1024_cosine_recall@5": 0.17929032559391017,
"eval_dim_128_cosine_accuracy@1": 0.3028169014084507,
"eval_dim_128_cosine_accuracy@10": 0.354033290653009,
"eval_dim_128_cosine_accuracy@3": 0.3066581306017926,
"eval_dim_128_cosine_accuracy@5": 0.3258642765685019,
"eval_dim_128_cosine_map@100": 0.3737489129899149,
"eval_dim_128_cosine_mrr@10": 0.31138141983212375,
"eval_dim_128_cosine_ndcg@10": 0.3200987320599894,
"eval_dim_128_cosine_precision@1": 0.3028169014084507,
"eval_dim_128_cosine_precision@10": 0.26325224071702946,
"eval_dim_128_cosine_precision@3": 0.30217669654289375,
"eval_dim_128_cosine_precision@5": 0.29334186939820744,
"eval_dim_128_cosine_recall@1": 0.03581534845465155,
"eval_dim_128_cosine_recall@10": 0.23457162530017844,
"eval_dim_128_cosine_recall@3": 0.10498018962345104,
"eval_dim_128_cosine_recall@5": 0.15825094621698793,
"eval_dim_256_cosine_accuracy@1": 0.3111395646606914,
"eval_dim_256_cosine_accuracy@10": 0.36619718309859156,
"eval_dim_256_cosine_accuracy@3": 0.31882202304737517,
"eval_dim_256_cosine_accuracy@5": 0.3418693982074264,
"eval_dim_256_cosine_map@100": 0.3876570902519949,
"eval_dim_256_cosine_mrr@10": 0.32126318517163555,
"eval_dim_256_cosine_ndcg@10": 0.3316834258973034,
"eval_dim_256_cosine_precision@1": 0.3111395646606914,
"eval_dim_256_cosine_precision@10": 0.2723431498079385,
"eval_dim_256_cosine_precision@3": 0.31156636790439607,
"eval_dim_256_cosine_precision@5": 0.30371318822023047,
"eval_dim_256_cosine_recall@1": 0.03702717845490271,
"eval_dim_256_cosine_recall@10": 0.24388584743594785,
"eval_dim_256_cosine_recall@3": 0.10903486138141442,
"eval_dim_256_cosine_recall@5": 0.16522998831931382,
"eval_dim_512_cosine_accuracy@1": 0.324583866837388,
"eval_dim_512_cosine_accuracy@10": 0.3886043533930858,
"eval_dim_512_cosine_accuracy@3": 0.33034571062740076,
"eval_dim_512_cosine_accuracy@5": 0.3553137003841229,
"eval_dim_512_cosine_map@100": 0.4105799203347045,
"eval_dim_512_cosine_mrr@10": 0.3356639839034201,
"eval_dim_512_cosine_ndcg@10": 0.34755602204164354,
"eval_dim_512_cosine_precision@1": 0.324583866837388,
"eval_dim_512_cosine_precision@10": 0.28425096030729835,
"eval_dim_512_cosine_precision@3": 0.3243704652155356,
"eval_dim_512_cosine_precision@5": 0.31549295774647884,
"eval_dim_512_cosine_recall@1": 0.039408176645563966,
"eval_dim_512_cosine_recall@10": 0.2588290716980974,
"eval_dim_512_cosine_recall@3": 0.11569400881462148,
"eval_dim_512_cosine_recall@5": 0.17452688474231048,
"eval_dim_64_cosine_accuracy@1": 0.264404609475032,
"eval_dim_64_cosine_accuracy@10": 0.3220230473751601,
"eval_dim_64_cosine_accuracy@3": 0.26952624839948786,
"eval_dim_64_cosine_accuracy@5": 0.2912932138284251,
"eval_dim_64_cosine_map@100": 0.3356052539796121,
"eval_dim_64_cosine_mrr@10": 0.27433820092270755,
"eval_dim_64_cosine_ndcg@10": 0.28363892738216534,
"eval_dim_64_cosine_precision@1": 0.264404609475032,
"eval_dim_64_cosine_precision@10": 0.23399487836107555,
"eval_dim_64_cosine_precision@3": 0.2639778062313273,
"eval_dim_64_cosine_precision@5": 0.2573623559539053,
"eval_dim_64_cosine_recall@1": 0.03137978486480133,
"eval_dim_64_cosine_recall@10": 0.2079536154587263,
"eval_dim_64_cosine_recall@3": 0.09184879304327909,
"eval_dim_64_cosine_recall@5": 0.13906413978147564,
"eval_dim_768_cosine_accuracy@1": 0.3290653008962868,
"eval_dim_768_cosine_accuracy@10": 0.3911651728553137,
"eval_dim_768_cosine_accuracy@3": 0.3348271446862996,
"eval_dim_768_cosine_accuracy@5": 0.3565941101152369,
"eval_dim_768_cosine_map@100": 0.41513115137941903,
"eval_dim_768_cosine_mrr@10": 0.3396022600247949,
"eval_dim_768_cosine_ndcg@10": 0.35038934007937644,
"eval_dim_768_cosine_precision@1": 0.3290653008962868,
"eval_dim_768_cosine_precision@10": 0.2860435339308579,
"eval_dim_768_cosine_precision@3": 0.32885189927443453,
"eval_dim_768_cosine_precision@5": 0.31907810499359796,
"eval_dim_768_cosine_recall@1": 0.040070803135958795,
"eval_dim_768_cosine_recall@10": 0.2600215922621299,
"eval_dim_768_cosine_recall@3": 0.11769625185650755,
"eval_dim_768_cosine_recall@5": 0.17699013287798807,
"eval_runtime": 98.8385,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.28363892738216534,
"eval_steps_per_second": 0.0,
"step": 196
}
],
"logging_steps": 1,
"max_steps": 1960,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 1
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}