Sentence-Matcher-v0.5 / trainer_state.json
Praveenkr-ts's picture
Upload folder using huggingface_hub
90357e1 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 490,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01020408163265306,
"grad_norm": 1.5778175592422485,
"learning_rate": 0.0,
"loss": 0.1624,
"step": 1
},
{
"epoch": 0.02040816326530612,
"grad_norm": 1.3363069295883179,
"learning_rate": 4.0816326530612243e-07,
"loss": 0.1369,
"step": 2
},
{
"epoch": 0.030612244897959183,
"grad_norm": 1.248914361000061,
"learning_rate": 8.163265306122449e-07,
"loss": 0.1151,
"step": 3
},
{
"epoch": 0.04081632653061224,
"grad_norm": 1.4152894020080566,
"learning_rate": 1.2244897959183673e-06,
"loss": 0.1031,
"step": 4
},
{
"epoch": 0.05102040816326531,
"grad_norm": 1.2651602029800415,
"learning_rate": 1.6326530612244897e-06,
"loss": 0.097,
"step": 5
},
{
"epoch": 0.061224489795918366,
"grad_norm": 1.289127230644226,
"learning_rate": 2.0408163265306125e-06,
"loss": 0.095,
"step": 6
},
{
"epoch": 0.07142857142857142,
"grad_norm": 1.1998013257980347,
"learning_rate": 2.4489795918367347e-06,
"loss": 0.1236,
"step": 7
},
{
"epoch": 0.08163265306122448,
"grad_norm": 1.538970947265625,
"learning_rate": 2.8571428571428573e-06,
"loss": 0.1177,
"step": 8
},
{
"epoch": 0.09183673469387756,
"grad_norm": 1.2269916534423828,
"learning_rate": 3.2653061224489794e-06,
"loss": 0.0931,
"step": 9
},
{
"epoch": 0.10204081632653061,
"grad_norm": 1.2894030809402466,
"learning_rate": 3.6734693877551024e-06,
"loss": 0.1049,
"step": 10
},
{
"epoch": 0.11224489795918367,
"grad_norm": 0.9413341283798218,
"learning_rate": 4.081632653061225e-06,
"loss": 0.0757,
"step": 11
},
{
"epoch": 0.12244897959183673,
"grad_norm": 1.1163856983184814,
"learning_rate": 4.489795918367348e-06,
"loss": 0.0936,
"step": 12
},
{
"epoch": 0.1326530612244898,
"grad_norm": 0.997565507888794,
"learning_rate": 4.897959183673469e-06,
"loss": 0.0797,
"step": 13
},
{
"epoch": 0.14285714285714285,
"grad_norm": 1.0046685934066772,
"learning_rate": 5.306122448979593e-06,
"loss": 0.0855,
"step": 14
},
{
"epoch": 0.15306122448979592,
"grad_norm": 0.9205936789512634,
"learning_rate": 5.7142857142857145e-06,
"loss": 0.079,
"step": 15
},
{
"epoch": 0.16326530612244897,
"grad_norm": 0.8712719678878784,
"learning_rate": 6.122448979591837e-06,
"loss": 0.0666,
"step": 16
},
{
"epoch": 0.17346938775510204,
"grad_norm": 0.9426755905151367,
"learning_rate": 6.530612244897959e-06,
"loss": 0.073,
"step": 17
},
{
"epoch": 0.1836734693877551,
"grad_norm": 0.8015092611312866,
"learning_rate": 6.938775510204082e-06,
"loss": 0.0669,
"step": 18
},
{
"epoch": 0.19387755102040816,
"grad_norm": 0.7584081292152405,
"learning_rate": 7.346938775510205e-06,
"loss": 0.0517,
"step": 19
},
{
"epoch": 0.20408163265306123,
"grad_norm": 0.7984261512756348,
"learning_rate": 7.755102040816327e-06,
"loss": 0.0667,
"step": 20
},
{
"epoch": 0.21428571428571427,
"grad_norm": 0.7533179521560669,
"learning_rate": 8.16326530612245e-06,
"loss": 0.0639,
"step": 21
},
{
"epoch": 0.22448979591836735,
"grad_norm": 0.9156713485717773,
"learning_rate": 8.571428571428571e-06,
"loss": 0.0729,
"step": 22
},
{
"epoch": 0.23469387755102042,
"grad_norm": 0.6727928519248962,
"learning_rate": 8.979591836734695e-06,
"loss": 0.0565,
"step": 23
},
{
"epoch": 0.24489795918367346,
"grad_norm": 0.5908196568489075,
"learning_rate": 9.387755102040818e-06,
"loss": 0.0501,
"step": 24
},
{
"epoch": 0.25510204081632654,
"grad_norm": 0.5994157195091248,
"learning_rate": 9.795918367346939e-06,
"loss": 0.0596,
"step": 25
},
{
"epoch": 0.2653061224489796,
"grad_norm": 0.5699151754379272,
"learning_rate": 1.0204081632653063e-05,
"loss": 0.0478,
"step": 26
},
{
"epoch": 0.2755102040816326,
"grad_norm": 0.41071373224258423,
"learning_rate": 1.0612244897959186e-05,
"loss": 0.0306,
"step": 27
},
{
"epoch": 0.2857142857142857,
"grad_norm": 0.6520228981971741,
"learning_rate": 1.1020408163265306e-05,
"loss": 0.0509,
"step": 28
},
{
"epoch": 0.29591836734693877,
"grad_norm": 0.5062035918235779,
"learning_rate": 1.1428571428571429e-05,
"loss": 0.0415,
"step": 29
},
{
"epoch": 0.30612244897959184,
"grad_norm": 0.5349479913711548,
"learning_rate": 1.1836734693877552e-05,
"loss": 0.0396,
"step": 30
},
{
"epoch": 0.3163265306122449,
"grad_norm": 0.29080551862716675,
"learning_rate": 1.2244897959183674e-05,
"loss": 0.0215,
"step": 31
},
{
"epoch": 0.32653061224489793,
"grad_norm": 0.5437124371528625,
"learning_rate": 1.2653061224489798e-05,
"loss": 0.0402,
"step": 32
},
{
"epoch": 0.336734693877551,
"grad_norm": 0.7592443823814392,
"learning_rate": 1.3061224489795918e-05,
"loss": 0.0692,
"step": 33
},
{
"epoch": 0.3469387755102041,
"grad_norm": 0.6122593283653259,
"learning_rate": 1.3469387755102042e-05,
"loss": 0.0602,
"step": 34
},
{
"epoch": 0.35714285714285715,
"grad_norm": 0.31229618191719055,
"learning_rate": 1.3877551020408165e-05,
"loss": 0.0215,
"step": 35
},
{
"epoch": 0.3673469387755102,
"grad_norm": 0.37940043210983276,
"learning_rate": 1.4285714285714287e-05,
"loss": 0.0274,
"step": 36
},
{
"epoch": 0.37755102040816324,
"grad_norm": 0.2848958969116211,
"learning_rate": 1.469387755102041e-05,
"loss": 0.0212,
"step": 37
},
{
"epoch": 0.3877551020408163,
"grad_norm": 0.3313491940498352,
"learning_rate": 1.510204081632653e-05,
"loss": 0.0231,
"step": 38
},
{
"epoch": 0.3979591836734694,
"grad_norm": 0.24816128611564636,
"learning_rate": 1.5510204081632655e-05,
"loss": 0.0159,
"step": 39
},
{
"epoch": 0.40816326530612246,
"grad_norm": 0.2411227524280548,
"learning_rate": 1.5918367346938776e-05,
"loss": 0.0154,
"step": 40
},
{
"epoch": 0.41836734693877553,
"grad_norm": 0.20029953122138977,
"learning_rate": 1.63265306122449e-05,
"loss": 0.013,
"step": 41
},
{
"epoch": 0.42857142857142855,
"grad_norm": 0.23513872921466827,
"learning_rate": 1.673469387755102e-05,
"loss": 0.0144,
"step": 42
},
{
"epoch": 0.4387755102040816,
"grad_norm": 0.4002116918563843,
"learning_rate": 1.7142857142857142e-05,
"loss": 0.0353,
"step": 43
},
{
"epoch": 0.4489795918367347,
"grad_norm": 0.27058476209640503,
"learning_rate": 1.7551020408163266e-05,
"loss": 0.0169,
"step": 44
},
{
"epoch": 0.45918367346938777,
"grad_norm": 0.09177622944116592,
"learning_rate": 1.795918367346939e-05,
"loss": 0.0055,
"step": 45
},
{
"epoch": 0.46938775510204084,
"grad_norm": 0.16117192804813385,
"learning_rate": 1.836734693877551e-05,
"loss": 0.0098,
"step": 46
},
{
"epoch": 0.47959183673469385,
"grad_norm": 0.1534506231546402,
"learning_rate": 1.8775510204081636e-05,
"loss": 0.0071,
"step": 47
},
{
"epoch": 0.4897959183673469,
"grad_norm": 0.2407277524471283,
"learning_rate": 1.9183673469387756e-05,
"loss": 0.0167,
"step": 48
},
{
"epoch": 0.5,
"grad_norm": 0.1129893809556961,
"learning_rate": 1.9591836734693877e-05,
"loss": 0.0062,
"step": 49
},
{
"epoch": 0.5102040816326531,
"grad_norm": 0.15642525255680084,
"learning_rate": 2e-05,
"loss": 0.0064,
"step": 50
},
{
"epoch": 0.5204081632653061,
"grad_norm": 0.30184754729270935,
"learning_rate": 1.9954648526077098e-05,
"loss": 0.0125,
"step": 51
},
{
"epoch": 0.5306122448979592,
"grad_norm": 0.07106052339076996,
"learning_rate": 1.9909297052154198e-05,
"loss": 0.0044,
"step": 52
},
{
"epoch": 0.5408163265306123,
"grad_norm": 0.26723626255989075,
"learning_rate": 1.9863945578231295e-05,
"loss": 0.0193,
"step": 53
},
{
"epoch": 0.5510204081632653,
"grad_norm": 0.09843797981739044,
"learning_rate": 1.981859410430839e-05,
"loss": 0.0058,
"step": 54
},
{
"epoch": 0.5612244897959183,
"grad_norm": 0.07936914265155792,
"learning_rate": 1.977324263038549e-05,
"loss": 0.0043,
"step": 55
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.06574582308530807,
"learning_rate": 1.9727891156462588e-05,
"loss": 0.0036,
"step": 56
},
{
"epoch": 0.5816326530612245,
"grad_norm": 0.030592354014515877,
"learning_rate": 1.9682539682539684e-05,
"loss": 0.0018,
"step": 57
},
{
"epoch": 0.5918367346938775,
"grad_norm": 0.07422778010368347,
"learning_rate": 1.963718820861678e-05,
"loss": 0.0039,
"step": 58
},
{
"epoch": 0.6020408163265306,
"grad_norm": 0.05910489708185196,
"learning_rate": 1.9591836734693877e-05,
"loss": 0.0031,
"step": 59
},
{
"epoch": 0.6122448979591837,
"grad_norm": 0.035012971609830856,
"learning_rate": 1.9546485260770977e-05,
"loss": 0.0019,
"step": 60
},
{
"epoch": 0.6224489795918368,
"grad_norm": 0.05377289652824402,
"learning_rate": 1.9501133786848074e-05,
"loss": 0.003,
"step": 61
},
{
"epoch": 0.6326530612244898,
"grad_norm": 0.059411946684122086,
"learning_rate": 1.945578231292517e-05,
"loss": 0.003,
"step": 62
},
{
"epoch": 0.6428571428571429,
"grad_norm": 0.07829850167036057,
"learning_rate": 1.941043083900227e-05,
"loss": 0.0039,
"step": 63
},
{
"epoch": 0.6530612244897959,
"grad_norm": 0.1004122868180275,
"learning_rate": 1.9365079365079367e-05,
"loss": 0.0048,
"step": 64
},
{
"epoch": 0.6632653061224489,
"grad_norm": 0.022585352882742882,
"learning_rate": 1.9319727891156463e-05,
"loss": 0.0013,
"step": 65
},
{
"epoch": 0.673469387755102,
"grad_norm": 0.08342932909727097,
"learning_rate": 1.9274376417233563e-05,
"loss": 0.0039,
"step": 66
},
{
"epoch": 0.6836734693877551,
"grad_norm": 0.3428645730018616,
"learning_rate": 1.922902494331066e-05,
"loss": 0.0113,
"step": 67
},
{
"epoch": 0.6938775510204082,
"grad_norm": 0.08267664909362793,
"learning_rate": 1.9183673469387756e-05,
"loss": 0.0042,
"step": 68
},
{
"epoch": 0.7040816326530612,
"grad_norm": 0.07195252925157547,
"learning_rate": 1.9138321995464853e-05,
"loss": 0.0029,
"step": 69
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.024874132126569748,
"learning_rate": 1.9092970521541953e-05,
"loss": 0.0014,
"step": 70
},
{
"epoch": 0.7244897959183674,
"grad_norm": 0.03532341867685318,
"learning_rate": 1.904761904761905e-05,
"loss": 0.0012,
"step": 71
},
{
"epoch": 0.7346938775510204,
"grad_norm": 0.01858861930668354,
"learning_rate": 1.9002267573696146e-05,
"loss": 0.001,
"step": 72
},
{
"epoch": 0.7448979591836735,
"grad_norm": 0.36321982741355896,
"learning_rate": 1.8956916099773243e-05,
"loss": 0.0128,
"step": 73
},
{
"epoch": 0.7551020408163265,
"grad_norm": 0.20222659409046173,
"learning_rate": 1.8911564625850343e-05,
"loss": 0.0076,
"step": 74
},
{
"epoch": 0.7653061224489796,
"grad_norm": 0.07980707287788391,
"learning_rate": 1.886621315192744e-05,
"loss": 0.0031,
"step": 75
},
{
"epoch": 0.7755102040816326,
"grad_norm": 0.020555464550852776,
"learning_rate": 1.8820861678004536e-05,
"loss": 0.0012,
"step": 76
},
{
"epoch": 0.7857142857142857,
"grad_norm": 0.02769128419458866,
"learning_rate": 1.8775510204081636e-05,
"loss": 0.0014,
"step": 77
},
{
"epoch": 0.7959183673469388,
"grad_norm": 0.030886279419064522,
"learning_rate": 1.8730158730158732e-05,
"loss": 0.0015,
"step": 78
},
{
"epoch": 0.8061224489795918,
"grad_norm": 0.04239689186215401,
"learning_rate": 1.868480725623583e-05,
"loss": 0.0017,
"step": 79
},
{
"epoch": 0.8163265306122449,
"grad_norm": 0.03217000514268875,
"learning_rate": 1.863945578231293e-05,
"loss": 0.0014,
"step": 80
},
{
"epoch": 0.826530612244898,
"grad_norm": 0.029874522238969803,
"learning_rate": 1.8594104308390025e-05,
"loss": 0.0015,
"step": 81
},
{
"epoch": 0.8367346938775511,
"grad_norm": 0.02627841755747795,
"learning_rate": 1.8548752834467122e-05,
"loss": 0.0013,
"step": 82
},
{
"epoch": 0.8469387755102041,
"grad_norm": 0.019378235563635826,
"learning_rate": 1.8503401360544218e-05,
"loss": 0.001,
"step": 83
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.08690612763166428,
"learning_rate": 1.8458049886621315e-05,
"loss": 0.0021,
"step": 84
},
{
"epoch": 0.8673469387755102,
"grad_norm": 0.014376318082213402,
"learning_rate": 1.8412698412698415e-05,
"loss": 0.0008,
"step": 85
},
{
"epoch": 0.8775510204081632,
"grad_norm": 0.01724099926650524,
"learning_rate": 1.836734693877551e-05,
"loss": 0.0009,
"step": 86
},
{
"epoch": 0.8877551020408163,
"grad_norm": 0.3265489339828491,
"learning_rate": 1.8321995464852608e-05,
"loss": 0.0117,
"step": 87
},
{
"epoch": 0.8979591836734694,
"grad_norm": 0.09740184992551804,
"learning_rate": 1.8276643990929708e-05,
"loss": 0.003,
"step": 88
},
{
"epoch": 0.9081632653061225,
"grad_norm": 0.015478034503757954,
"learning_rate": 1.8231292517006804e-05,
"loss": 0.0008,
"step": 89
},
{
"epoch": 0.9183673469387755,
"grad_norm": 0.18761862814426422,
"learning_rate": 1.81859410430839e-05,
"loss": 0.0068,
"step": 90
},
{
"epoch": 0.9285714285714286,
"grad_norm": 0.038408756256103516,
"learning_rate": 1.8140589569161e-05,
"loss": 0.0014,
"step": 91
},
{
"epoch": 0.9387755102040817,
"grad_norm": 0.03130817040801048,
"learning_rate": 1.8095238095238097e-05,
"loss": 0.0014,
"step": 92
},
{
"epoch": 0.9489795918367347,
"grad_norm": 0.014020106755197048,
"learning_rate": 1.8049886621315194e-05,
"loss": 0.0007,
"step": 93
},
{
"epoch": 0.9591836734693877,
"grad_norm": 0.02029995806515217,
"learning_rate": 1.8004535147392294e-05,
"loss": 0.0011,
"step": 94
},
{
"epoch": 0.9693877551020408,
"grad_norm": 0.021185798570513725,
"learning_rate": 1.795918367346939e-05,
"loss": 0.0009,
"step": 95
},
{
"epoch": 0.9795918367346939,
"grad_norm": 0.014589856378734112,
"learning_rate": 1.7913832199546487e-05,
"loss": 0.0008,
"step": 96
},
{
"epoch": 0.9897959183673469,
"grad_norm": 0.022265039384365082,
"learning_rate": 1.7868480725623583e-05,
"loss": 0.0011,
"step": 97
},
{
"epoch": 1.0,
"grad_norm": 0.0408700592815876,
"learning_rate": 1.782312925170068e-05,
"loss": 0.0011,
"step": 98
},
{
"epoch": 1.010204081632653,
"grad_norm": 0.009026318788528442,
"learning_rate": 1.7777777777777777e-05,
"loss": 0.0005,
"step": 99
},
{
"epoch": 1.0204081632653061,
"grad_norm": 0.007882497273385525,
"learning_rate": 1.7732426303854877e-05,
"loss": 0.0005,
"step": 100
},
{
"epoch": 1.030612244897959,
"grad_norm": 0.025666292756795883,
"learning_rate": 1.7687074829931973e-05,
"loss": 0.0012,
"step": 101
},
{
"epoch": 1.0408163265306123,
"grad_norm": 0.01795661635696888,
"learning_rate": 1.7641723356009073e-05,
"loss": 0.0008,
"step": 102
},
{
"epoch": 1.0510204081632653,
"grad_norm": 0.04071149602532387,
"learning_rate": 1.759637188208617e-05,
"loss": 0.0016,
"step": 103
},
{
"epoch": 1.0612244897959184,
"grad_norm": 0.007932674139738083,
"learning_rate": 1.7551020408163266e-05,
"loss": 0.0005,
"step": 104
},
{
"epoch": 1.0714285714285714,
"grad_norm": 0.03695099800825119,
"learning_rate": 1.7505668934240366e-05,
"loss": 0.0015,
"step": 105
},
{
"epoch": 1.0816326530612246,
"grad_norm": 0.008060461841523647,
"learning_rate": 1.7460317460317463e-05,
"loss": 0.0005,
"step": 106
},
{
"epoch": 1.0918367346938775,
"grad_norm": 0.04425932839512825,
"learning_rate": 1.741496598639456e-05,
"loss": 0.0018,
"step": 107
},
{
"epoch": 1.1020408163265305,
"grad_norm": 0.010241498239338398,
"learning_rate": 1.736961451247166e-05,
"loss": 0.0006,
"step": 108
},
{
"epoch": 1.1122448979591837,
"grad_norm": 0.010430874302983284,
"learning_rate": 1.7324263038548756e-05,
"loss": 0.0006,
"step": 109
},
{
"epoch": 1.1224489795918366,
"grad_norm": 0.16115950047969818,
"learning_rate": 1.7278911564625852e-05,
"loss": 0.0043,
"step": 110
},
{
"epoch": 1.1326530612244898,
"grad_norm": 0.018837768584489822,
"learning_rate": 1.723356009070295e-05,
"loss": 0.0007,
"step": 111
},
{
"epoch": 1.1428571428571428,
"grad_norm": 0.016730893403291702,
"learning_rate": 1.7188208616780045e-05,
"loss": 0.0009,
"step": 112
},
{
"epoch": 1.153061224489796,
"grad_norm": 0.011841993778944016,
"learning_rate": 1.7142857142857142e-05,
"loss": 0.0007,
"step": 113
},
{
"epoch": 1.163265306122449,
"grad_norm": 0.045097168534994125,
"learning_rate": 1.7097505668934242e-05,
"loss": 0.0019,
"step": 114
},
{
"epoch": 1.1734693877551021,
"grad_norm": 0.09953276813030243,
"learning_rate": 1.705215419501134e-05,
"loss": 0.0032,
"step": 115
},
{
"epoch": 1.183673469387755,
"grad_norm": 0.007014868780970573,
"learning_rate": 1.7006802721088435e-05,
"loss": 0.0004,
"step": 116
},
{
"epoch": 1.193877551020408,
"grad_norm": 0.009045367129147053,
"learning_rate": 1.6961451247165535e-05,
"loss": 0.0005,
"step": 117
},
{
"epoch": 1.2040816326530612,
"grad_norm": 0.007859342731535435,
"learning_rate": 1.691609977324263e-05,
"loss": 0.0005,
"step": 118
},
{
"epoch": 1.2142857142857142,
"grad_norm": 0.032524097710847855,
"learning_rate": 1.687074829931973e-05,
"loss": 0.0009,
"step": 119
},
{
"epoch": 1.2244897959183674,
"grad_norm": 0.05151795968413353,
"learning_rate": 1.6825396825396828e-05,
"loss": 0.0018,
"step": 120
},
{
"epoch": 1.2346938775510203,
"grad_norm": 0.010988794267177582,
"learning_rate": 1.6780045351473924e-05,
"loss": 0.0006,
"step": 121
},
{
"epoch": 1.2448979591836735,
"grad_norm": 0.006904716603457928,
"learning_rate": 1.673469387755102e-05,
"loss": 0.0004,
"step": 122
},
{
"epoch": 1.2551020408163265,
"grad_norm": 0.006797518581151962,
"learning_rate": 1.668934240362812e-05,
"loss": 0.0004,
"step": 123
},
{
"epoch": 1.2653061224489797,
"grad_norm": 0.01896447129547596,
"learning_rate": 1.6643990929705217e-05,
"loss": 0.0008,
"step": 124
},
{
"epoch": 1.2755102040816326,
"grad_norm": 0.01258290559053421,
"learning_rate": 1.6598639455782314e-05,
"loss": 0.0007,
"step": 125
},
{
"epoch": 1.2857142857142856,
"grad_norm": 0.015102504752576351,
"learning_rate": 1.655328798185941e-05,
"loss": 0.0006,
"step": 126
},
{
"epoch": 1.2959183673469388,
"grad_norm": 0.005591754335910082,
"learning_rate": 1.6507936507936507e-05,
"loss": 0.0004,
"step": 127
},
{
"epoch": 1.306122448979592,
"grad_norm": 0.10714168101549149,
"learning_rate": 1.6462585034013607e-05,
"loss": 0.0032,
"step": 128
},
{
"epoch": 1.316326530612245,
"grad_norm": 0.030577057972550392,
"learning_rate": 1.6417233560090704e-05,
"loss": 0.0011,
"step": 129
},
{
"epoch": 1.3265306122448979,
"grad_norm": 0.017115091904997826,
"learning_rate": 1.63718820861678e-05,
"loss": 0.0008,
"step": 130
},
{
"epoch": 1.336734693877551,
"grad_norm": 0.011700804345309734,
"learning_rate": 1.63265306122449e-05,
"loss": 0.0006,
"step": 131
},
{
"epoch": 1.346938775510204,
"grad_norm": 0.008858302608132362,
"learning_rate": 1.6281179138321997e-05,
"loss": 0.0004,
"step": 132
},
{
"epoch": 1.3571428571428572,
"grad_norm": 0.00907884445041418,
"learning_rate": 1.6235827664399097e-05,
"loss": 0.0005,
"step": 133
},
{
"epoch": 1.3673469387755102,
"grad_norm": 0.00645515276119113,
"learning_rate": 1.6190476190476193e-05,
"loss": 0.0003,
"step": 134
},
{
"epoch": 1.3775510204081631,
"grad_norm": 0.01644102856516838,
"learning_rate": 1.614512471655329e-05,
"loss": 0.0006,
"step": 135
},
{
"epoch": 1.3877551020408163,
"grad_norm": 0.023088015615940094,
"learning_rate": 1.6099773242630386e-05,
"loss": 0.0009,
"step": 136
},
{
"epoch": 1.3979591836734695,
"grad_norm": 0.004741874989122152,
"learning_rate": 1.6054421768707483e-05,
"loss": 0.0003,
"step": 137
},
{
"epoch": 1.4081632653061225,
"grad_norm": 0.005127857904881239,
"learning_rate": 1.6009070294784583e-05,
"loss": 0.0003,
"step": 138
},
{
"epoch": 1.4183673469387754,
"grad_norm": 0.009942681528627872,
"learning_rate": 1.596371882086168e-05,
"loss": 0.0005,
"step": 139
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.009013745002448559,
"learning_rate": 1.5918367346938776e-05,
"loss": 0.0005,
"step": 140
},
{
"epoch": 1.4387755102040816,
"grad_norm": 0.008382913656532764,
"learning_rate": 1.5873015873015872e-05,
"loss": 0.0005,
"step": 141
},
{
"epoch": 1.4489795918367347,
"grad_norm": 0.01376293320208788,
"learning_rate": 1.5827664399092972e-05,
"loss": 0.0006,
"step": 142
},
{
"epoch": 1.4591836734693877,
"grad_norm": 0.07127456367015839,
"learning_rate": 1.578231292517007e-05,
"loss": 0.0022,
"step": 143
},
{
"epoch": 1.469387755102041,
"grad_norm": 0.006247013341635466,
"learning_rate": 1.5736961451247165e-05,
"loss": 0.0004,
"step": 144
},
{
"epoch": 1.4795918367346939,
"grad_norm": 0.03836556524038315,
"learning_rate": 1.5691609977324265e-05,
"loss": 0.0012,
"step": 145
},
{
"epoch": 1.489795918367347,
"grad_norm": 0.011062193661928177,
"learning_rate": 1.5646258503401362e-05,
"loss": 0.0006,
"step": 146
},
{
"epoch": 1.5,
"grad_norm": 0.005953874904662371,
"learning_rate": 1.5600907029478462e-05,
"loss": 0.0003,
"step": 147
},
{
"epoch": 1.510204081632653,
"grad_norm": 0.014096422120928764,
"learning_rate": 1.555555555555556e-05,
"loss": 0.0008,
"step": 148
},
{
"epoch": 1.5204081632653061,
"grad_norm": 0.0064276340417563915,
"learning_rate": 1.5510204081632655e-05,
"loss": 0.0004,
"step": 149
},
{
"epoch": 1.5306122448979593,
"grad_norm": 0.007169738411903381,
"learning_rate": 1.546485260770975e-05,
"loss": 0.0004,
"step": 150
},
{
"epoch": 1.5408163265306123,
"grad_norm": 0.005434677470475435,
"learning_rate": 1.5419501133786848e-05,
"loss": 0.0004,
"step": 151
},
{
"epoch": 1.5510204081632653,
"grad_norm": 0.008770623244345188,
"learning_rate": 1.5374149659863945e-05,
"loss": 0.0004,
"step": 152
},
{
"epoch": 1.5612244897959182,
"grad_norm": 0.011159502901136875,
"learning_rate": 1.5328798185941044e-05,
"loss": 0.0007,
"step": 153
},
{
"epoch": 1.5714285714285714,
"grad_norm": 0.10471421480178833,
"learning_rate": 1.528344671201814e-05,
"loss": 0.0022,
"step": 154
},
{
"epoch": 1.5816326530612246,
"grad_norm": 0.008612933568656445,
"learning_rate": 1.523809523809524e-05,
"loss": 0.0005,
"step": 155
},
{
"epoch": 1.5918367346938775,
"grad_norm": 0.006375262048095465,
"learning_rate": 1.5192743764172338e-05,
"loss": 0.0003,
"step": 156
},
{
"epoch": 1.6020408163265305,
"grad_norm": 0.00903844740241766,
"learning_rate": 1.5147392290249434e-05,
"loss": 0.0005,
"step": 157
},
{
"epoch": 1.6122448979591837,
"grad_norm": 0.005267101805657148,
"learning_rate": 1.510204081632653e-05,
"loss": 0.0003,
"step": 158
},
{
"epoch": 1.6224489795918369,
"grad_norm": 0.006081985309720039,
"learning_rate": 1.505668934240363e-05,
"loss": 0.0004,
"step": 159
},
{
"epoch": 1.6326530612244898,
"grad_norm": 0.0072037833742797375,
"learning_rate": 1.5011337868480727e-05,
"loss": 0.0004,
"step": 160
},
{
"epoch": 1.6428571428571428,
"grad_norm": 0.0033731532748788595,
"learning_rate": 1.4965986394557825e-05,
"loss": 0.0002,
"step": 161
},
{
"epoch": 1.6530612244897958,
"grad_norm": 0.0077390824444592,
"learning_rate": 1.4920634920634922e-05,
"loss": 0.0005,
"step": 162
},
{
"epoch": 1.663265306122449,
"grad_norm": 0.009692452847957611,
"learning_rate": 1.4875283446712018e-05,
"loss": 0.0005,
"step": 163
},
{
"epoch": 1.6734693877551021,
"grad_norm": 0.006450532004237175,
"learning_rate": 1.4829931972789118e-05,
"loss": 0.0003,
"step": 164
},
{
"epoch": 1.683673469387755,
"grad_norm": 0.009719816036522388,
"learning_rate": 1.4784580498866215e-05,
"loss": 0.0005,
"step": 165
},
{
"epoch": 1.693877551020408,
"grad_norm": 0.010457034222781658,
"learning_rate": 1.4739229024943311e-05,
"loss": 0.0005,
"step": 166
},
{
"epoch": 1.7040816326530612,
"grad_norm": 0.008015105500817299,
"learning_rate": 1.469387755102041e-05,
"loss": 0.0004,
"step": 167
},
{
"epoch": 1.7142857142857144,
"grad_norm": 0.006307144183665514,
"learning_rate": 1.4648526077097506e-05,
"loss": 0.0003,
"step": 168
},
{
"epoch": 1.7244897959183674,
"grad_norm": 0.005334992427378893,
"learning_rate": 1.4603174603174603e-05,
"loss": 0.0003,
"step": 169
},
{
"epoch": 1.7346938775510203,
"grad_norm": 0.008067265152931213,
"learning_rate": 1.4557823129251703e-05,
"loss": 0.0003,
"step": 170
},
{
"epoch": 1.7448979591836735,
"grad_norm": 0.012681787833571434,
"learning_rate": 1.45124716553288e-05,
"loss": 0.0005,
"step": 171
},
{
"epoch": 1.7551020408163265,
"grad_norm": 0.01536930724978447,
"learning_rate": 1.4467120181405896e-05,
"loss": 0.0005,
"step": 172
},
{
"epoch": 1.7653061224489797,
"grad_norm": 0.0037332891952246428,
"learning_rate": 1.4421768707482994e-05,
"loss": 0.0002,
"step": 173
},
{
"epoch": 1.7755102040816326,
"grad_norm": 0.010341755114495754,
"learning_rate": 1.4376417233560092e-05,
"loss": 0.0005,
"step": 174
},
{
"epoch": 1.7857142857142856,
"grad_norm": 0.0045587471686303616,
"learning_rate": 1.433106575963719e-05,
"loss": 0.0003,
"step": 175
},
{
"epoch": 1.7959183673469388,
"grad_norm": 0.016639186069369316,
"learning_rate": 1.4285714285714287e-05,
"loss": 0.0006,
"step": 176
},
{
"epoch": 1.806122448979592,
"grad_norm": 0.005003046710044146,
"learning_rate": 1.4240362811791384e-05,
"loss": 0.0003,
"step": 177
},
{
"epoch": 1.816326530612245,
"grad_norm": 0.009210484102368355,
"learning_rate": 1.4195011337868484e-05,
"loss": 0.0004,
"step": 178
},
{
"epoch": 1.8265306122448979,
"grad_norm": 0.007876208052039146,
"learning_rate": 1.414965986394558e-05,
"loss": 0.0004,
"step": 179
},
{
"epoch": 1.836734693877551,
"grad_norm": 0.0038002703804522753,
"learning_rate": 1.4104308390022677e-05,
"loss": 0.0002,
"step": 180
},
{
"epoch": 1.8469387755102042,
"grad_norm": 0.00423433817923069,
"learning_rate": 1.4058956916099775e-05,
"loss": 0.0002,
"step": 181
},
{
"epoch": 1.8571428571428572,
"grad_norm": 0.008944015018641949,
"learning_rate": 1.4013605442176872e-05,
"loss": 0.0005,
"step": 182
},
{
"epoch": 1.8673469387755102,
"grad_norm": 0.004832221195101738,
"learning_rate": 1.3968253968253968e-05,
"loss": 0.0003,
"step": 183
},
{
"epoch": 1.8775510204081631,
"grad_norm": 0.005358612630516291,
"learning_rate": 1.3922902494331068e-05,
"loss": 0.0003,
"step": 184
},
{
"epoch": 1.8877551020408163,
"grad_norm": 0.004266591276973486,
"learning_rate": 1.3877551020408165e-05,
"loss": 0.0002,
"step": 185
},
{
"epoch": 1.8979591836734695,
"grad_norm": 0.004511923063546419,
"learning_rate": 1.3832199546485261e-05,
"loss": 0.0003,
"step": 186
},
{
"epoch": 1.9081632653061225,
"grad_norm": 0.12353862076997757,
"learning_rate": 1.378684807256236e-05,
"loss": 0.0032,
"step": 187
},
{
"epoch": 1.9183673469387754,
"grad_norm": 0.009472350589931011,
"learning_rate": 1.3741496598639456e-05,
"loss": 0.0006,
"step": 188
},
{
"epoch": 1.9285714285714286,
"grad_norm": 0.005253692157566547,
"learning_rate": 1.3696145124716554e-05,
"loss": 0.0003,
"step": 189
},
{
"epoch": 1.9387755102040818,
"grad_norm": 0.01199701614677906,
"learning_rate": 1.3650793650793652e-05,
"loss": 0.0005,
"step": 190
},
{
"epoch": 1.9489795918367347,
"grad_norm": 0.006006367038935423,
"learning_rate": 1.3605442176870749e-05,
"loss": 0.0003,
"step": 191
},
{
"epoch": 1.9591836734693877,
"grad_norm": 0.010423636995255947,
"learning_rate": 1.3560090702947847e-05,
"loss": 0.0004,
"step": 192
},
{
"epoch": 1.9693877551020407,
"grad_norm": 0.006484678015112877,
"learning_rate": 1.3514739229024945e-05,
"loss": 0.0004,
"step": 193
},
{
"epoch": 1.9795918367346939,
"grad_norm": 0.007823942229151726,
"learning_rate": 1.3469387755102042e-05,
"loss": 0.0004,
"step": 194
},
{
"epoch": 1.989795918367347,
"grad_norm": 0.005013170652091503,
"learning_rate": 1.342403628117914e-05,
"loss": 0.0003,
"step": 195
},
{
"epoch": 2.0,
"grad_norm": 0.003286719787865877,
"learning_rate": 1.3378684807256237e-05,
"loss": 0.0001,
"step": 196
},
{
"epoch": 2.010204081632653,
"grad_norm": 0.007182662840932608,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.0003,
"step": 197
},
{
"epoch": 2.020408163265306,
"grad_norm": 0.007124132476747036,
"learning_rate": 1.3287981859410433e-05,
"loss": 0.0003,
"step": 198
},
{
"epoch": 2.0306122448979593,
"grad_norm": 0.0033109758514910936,
"learning_rate": 1.324263038548753e-05,
"loss": 0.0002,
"step": 199
},
{
"epoch": 2.0408163265306123,
"grad_norm": 0.0030752080492675304,
"learning_rate": 1.3197278911564626e-05,
"loss": 0.0002,
"step": 200
},
{
"epoch": 2.0510204081632653,
"grad_norm": 0.003937916364520788,
"learning_rate": 1.3151927437641725e-05,
"loss": 0.0003,
"step": 201
},
{
"epoch": 2.061224489795918,
"grad_norm": 0.003902744734659791,
"learning_rate": 1.3106575963718821e-05,
"loss": 0.0002,
"step": 202
},
{
"epoch": 2.0714285714285716,
"grad_norm": 0.003365420503541827,
"learning_rate": 1.3061224489795918e-05,
"loss": 0.0002,
"step": 203
},
{
"epoch": 2.0816326530612246,
"grad_norm": 0.005177025683224201,
"learning_rate": 1.3015873015873018e-05,
"loss": 0.0003,
"step": 204
},
{
"epoch": 2.0918367346938775,
"grad_norm": 0.004482835531234741,
"learning_rate": 1.2970521541950114e-05,
"loss": 0.0003,
"step": 205
},
{
"epoch": 2.1020408163265305,
"grad_norm": 0.03137246519327164,
"learning_rate": 1.2925170068027212e-05,
"loss": 0.0008,
"step": 206
},
{
"epoch": 2.1122448979591835,
"grad_norm": 0.0061664879322052,
"learning_rate": 1.2879818594104309e-05,
"loss": 0.0004,
"step": 207
},
{
"epoch": 2.122448979591837,
"grad_norm": 0.007974425330758095,
"learning_rate": 1.2834467120181407e-05,
"loss": 0.0004,
"step": 208
},
{
"epoch": 2.13265306122449,
"grad_norm": 0.008234084583818913,
"learning_rate": 1.2789115646258505e-05,
"loss": 0.0004,
"step": 209
},
{
"epoch": 2.142857142857143,
"grad_norm": 0.005270775873214006,
"learning_rate": 1.2743764172335602e-05,
"loss": 0.0003,
"step": 210
},
{
"epoch": 2.1530612244897958,
"grad_norm": 0.008359711617231369,
"learning_rate": 1.2698412698412699e-05,
"loss": 0.0004,
"step": 211
},
{
"epoch": 2.163265306122449,
"grad_norm": 0.004007325973361731,
"learning_rate": 1.2653061224489798e-05,
"loss": 0.0002,
"step": 212
},
{
"epoch": 2.173469387755102,
"grad_norm": 0.004752746783196926,
"learning_rate": 1.2607709750566895e-05,
"loss": 0.0002,
"step": 213
},
{
"epoch": 2.183673469387755,
"grad_norm": 0.0031563639640808105,
"learning_rate": 1.2562358276643992e-05,
"loss": 0.0002,
"step": 214
},
{
"epoch": 2.193877551020408,
"grad_norm": 0.003636228386312723,
"learning_rate": 1.251700680272109e-05,
"loss": 0.0002,
"step": 215
},
{
"epoch": 2.204081632653061,
"grad_norm": 0.0034094173461198807,
"learning_rate": 1.2471655328798186e-05,
"loss": 0.0002,
"step": 216
},
{
"epoch": 2.2142857142857144,
"grad_norm": 0.004791253712028265,
"learning_rate": 1.2426303854875283e-05,
"loss": 0.0003,
"step": 217
},
{
"epoch": 2.2244897959183674,
"grad_norm": 0.010279831476509571,
"learning_rate": 1.2380952380952383e-05,
"loss": 0.0004,
"step": 218
},
{
"epoch": 2.2346938775510203,
"grad_norm": 0.006269859150052071,
"learning_rate": 1.233560090702948e-05,
"loss": 0.0003,
"step": 219
},
{
"epoch": 2.2448979591836733,
"grad_norm": 0.003878034185618162,
"learning_rate": 1.2290249433106578e-05,
"loss": 0.0002,
"step": 220
},
{
"epoch": 2.2551020408163267,
"grad_norm": 0.0031356397084891796,
"learning_rate": 1.2244897959183674e-05,
"loss": 0.0002,
"step": 221
},
{
"epoch": 2.2653061224489797,
"grad_norm": 0.004956800024956465,
"learning_rate": 1.219954648526077e-05,
"loss": 0.0003,
"step": 222
},
{
"epoch": 2.2755102040816326,
"grad_norm": 0.0036491460632532835,
"learning_rate": 1.215419501133787e-05,
"loss": 0.0002,
"step": 223
},
{
"epoch": 2.2857142857142856,
"grad_norm": 0.005171376280486584,
"learning_rate": 1.2108843537414967e-05,
"loss": 0.0003,
"step": 224
},
{
"epoch": 2.295918367346939,
"grad_norm": 0.0029648093041032553,
"learning_rate": 1.2063492063492064e-05,
"loss": 0.0002,
"step": 225
},
{
"epoch": 2.306122448979592,
"grad_norm": 0.006329487543553114,
"learning_rate": 1.2018140589569162e-05,
"loss": 0.0003,
"step": 226
},
{
"epoch": 2.316326530612245,
"grad_norm": 0.0031556261237710714,
"learning_rate": 1.197278911564626e-05,
"loss": 0.0002,
"step": 227
},
{
"epoch": 2.326530612244898,
"grad_norm": 0.009794807992875576,
"learning_rate": 1.1927437641723357e-05,
"loss": 0.0004,
"step": 228
},
{
"epoch": 2.336734693877551,
"grad_norm": 0.003714526304975152,
"learning_rate": 1.1882086167800455e-05,
"loss": 0.0002,
"step": 229
},
{
"epoch": 2.3469387755102042,
"grad_norm": 0.0031528149265795946,
"learning_rate": 1.1836734693877552e-05,
"loss": 0.0002,
"step": 230
},
{
"epoch": 2.357142857142857,
"grad_norm": 0.024612465873360634,
"learning_rate": 1.1791383219954648e-05,
"loss": 0.001,
"step": 231
},
{
"epoch": 2.36734693877551,
"grad_norm": 0.00424389261752367,
"learning_rate": 1.1746031746031748e-05,
"loss": 0.0002,
"step": 232
},
{
"epoch": 2.377551020408163,
"grad_norm": 0.01282750815153122,
"learning_rate": 1.1700680272108845e-05,
"loss": 0.0006,
"step": 233
},
{
"epoch": 2.387755102040816,
"grad_norm": 0.006169433705508709,
"learning_rate": 1.1655328798185943e-05,
"loss": 0.0003,
"step": 234
},
{
"epoch": 2.3979591836734695,
"grad_norm": 0.005562425125390291,
"learning_rate": 1.160997732426304e-05,
"loss": 0.0003,
"step": 235
},
{
"epoch": 2.4081632653061225,
"grad_norm": 0.01002059318125248,
"learning_rate": 1.1564625850340136e-05,
"loss": 0.0005,
"step": 236
},
{
"epoch": 2.4183673469387754,
"grad_norm": 0.007645392790436745,
"learning_rate": 1.1519274376417236e-05,
"loss": 0.0004,
"step": 237
},
{
"epoch": 2.4285714285714284,
"grad_norm": 0.03314538300037384,
"learning_rate": 1.1473922902494332e-05,
"loss": 0.0011,
"step": 238
},
{
"epoch": 2.438775510204082,
"grad_norm": 0.04838201776146889,
"learning_rate": 1.1428571428571429e-05,
"loss": 0.0009,
"step": 239
},
{
"epoch": 2.4489795918367347,
"grad_norm": 0.006126615218818188,
"learning_rate": 1.1383219954648527e-05,
"loss": 0.0004,
"step": 240
},
{
"epoch": 2.4591836734693877,
"grad_norm": 0.005240059457719326,
"learning_rate": 1.1337868480725624e-05,
"loss": 0.0003,
"step": 241
},
{
"epoch": 2.4693877551020407,
"grad_norm": 0.006122751161456108,
"learning_rate": 1.1292517006802722e-05,
"loss": 0.0003,
"step": 242
},
{
"epoch": 2.479591836734694,
"grad_norm": 0.0024781699758023024,
"learning_rate": 1.124716553287982e-05,
"loss": 0.0002,
"step": 243
},
{
"epoch": 2.489795918367347,
"grad_norm": 0.00649678660556674,
"learning_rate": 1.1201814058956917e-05,
"loss": 0.0004,
"step": 244
},
{
"epoch": 2.5,
"grad_norm": 0.003478443017229438,
"learning_rate": 1.1156462585034013e-05,
"loss": 0.0002,
"step": 245
},
{
"epoch": 2.510204081632653,
"grad_norm": 0.003858257783576846,
"learning_rate": 1.1111111111111113e-05,
"loss": 0.0002,
"step": 246
},
{
"epoch": 2.520408163265306,
"grad_norm": 0.006577960215508938,
"learning_rate": 1.106575963718821e-05,
"loss": 0.0004,
"step": 247
},
{
"epoch": 2.5306122448979593,
"grad_norm": 0.004543396644294262,
"learning_rate": 1.1020408163265306e-05,
"loss": 0.0003,
"step": 248
},
{
"epoch": 2.5408163265306123,
"grad_norm": 0.0032837800681591034,
"learning_rate": 1.0975056689342405e-05,
"loss": 0.0002,
"step": 249
},
{
"epoch": 2.5510204081632653,
"grad_norm": 0.012741784565150738,
"learning_rate": 1.0929705215419501e-05,
"loss": 0.0006,
"step": 250
},
{
"epoch": 2.561224489795918,
"grad_norm": 0.002741026459261775,
"learning_rate": 1.0884353741496601e-05,
"loss": 0.0002,
"step": 251
},
{
"epoch": 2.571428571428571,
"grad_norm": 0.0027930692303925753,
"learning_rate": 1.0839002267573698e-05,
"loss": 0.0002,
"step": 252
},
{
"epoch": 2.5816326530612246,
"grad_norm": 0.003156407503411174,
"learning_rate": 1.0793650793650794e-05,
"loss": 0.0002,
"step": 253
},
{
"epoch": 2.5918367346938775,
"grad_norm": 0.0036412908229976892,
"learning_rate": 1.0748299319727893e-05,
"loss": 0.0002,
"step": 254
},
{
"epoch": 2.6020408163265305,
"grad_norm": 0.04034988954663277,
"learning_rate": 1.0702947845804989e-05,
"loss": 0.0013,
"step": 255
},
{
"epoch": 2.612244897959184,
"grad_norm": 0.0034895159769803286,
"learning_rate": 1.0657596371882086e-05,
"loss": 0.0002,
"step": 256
},
{
"epoch": 2.622448979591837,
"grad_norm": 0.04325950890779495,
"learning_rate": 1.0612244897959186e-05,
"loss": 0.0012,
"step": 257
},
{
"epoch": 2.63265306122449,
"grad_norm": 0.004671269562095404,
"learning_rate": 1.0566893424036282e-05,
"loss": 0.0003,
"step": 258
},
{
"epoch": 2.642857142857143,
"grad_norm": 0.002534637926146388,
"learning_rate": 1.0521541950113379e-05,
"loss": 0.0002,
"step": 259
},
{
"epoch": 2.6530612244897958,
"grad_norm": 0.004457306116819382,
"learning_rate": 1.0476190476190477e-05,
"loss": 0.0003,
"step": 260
},
{
"epoch": 2.663265306122449,
"grad_norm": 0.004050545394420624,
"learning_rate": 1.0430839002267575e-05,
"loss": 0.0002,
"step": 261
},
{
"epoch": 2.673469387755102,
"grad_norm": 0.04582836106419563,
"learning_rate": 1.0385487528344672e-05,
"loss": 0.0011,
"step": 262
},
{
"epoch": 2.683673469387755,
"grad_norm": 0.004835136700421572,
"learning_rate": 1.034013605442177e-05,
"loss": 0.0003,
"step": 263
},
{
"epoch": 2.693877551020408,
"grad_norm": 0.008025884628295898,
"learning_rate": 1.0294784580498866e-05,
"loss": 0.0003,
"step": 264
},
{
"epoch": 2.704081632653061,
"grad_norm": 0.007876653224229813,
"learning_rate": 1.0249433106575966e-05,
"loss": 0.0004,
"step": 265
},
{
"epoch": 2.7142857142857144,
"grad_norm": 0.006527318619191647,
"learning_rate": 1.0204081632653063e-05,
"loss": 0.0003,
"step": 266
},
{
"epoch": 2.7244897959183674,
"grad_norm": 0.002143925055861473,
"learning_rate": 1.015873015873016e-05,
"loss": 0.0001,
"step": 267
},
{
"epoch": 2.7346938775510203,
"grad_norm": 0.003183850785717368,
"learning_rate": 1.0113378684807258e-05,
"loss": 0.0002,
"step": 268
},
{
"epoch": 2.7448979591836737,
"grad_norm": 0.003816920565441251,
"learning_rate": 1.0068027210884354e-05,
"loss": 0.0002,
"step": 269
},
{
"epoch": 2.7551020408163263,
"grad_norm": 0.005489765666425228,
"learning_rate": 1.0022675736961451e-05,
"loss": 0.0003,
"step": 270
},
{
"epoch": 2.7653061224489797,
"grad_norm": 0.002469045575708151,
"learning_rate": 9.977324263038549e-06,
"loss": 0.0002,
"step": 271
},
{
"epoch": 2.7755102040816326,
"grad_norm": 0.0031796926632523537,
"learning_rate": 9.931972789115647e-06,
"loss": 0.0002,
"step": 272
},
{
"epoch": 2.7857142857142856,
"grad_norm": 0.003679267829284072,
"learning_rate": 9.886621315192746e-06,
"loss": 0.0002,
"step": 273
},
{
"epoch": 2.795918367346939,
"grad_norm": 0.010035431012511253,
"learning_rate": 9.841269841269842e-06,
"loss": 0.0004,
"step": 274
},
{
"epoch": 2.806122448979592,
"grad_norm": 0.0031564754899591208,
"learning_rate": 9.795918367346939e-06,
"loss": 0.0002,
"step": 275
},
{
"epoch": 2.816326530612245,
"grad_norm": 0.005581808276474476,
"learning_rate": 9.750566893424037e-06,
"loss": 0.0003,
"step": 276
},
{
"epoch": 2.826530612244898,
"grad_norm": 0.002813218394294381,
"learning_rate": 9.705215419501135e-06,
"loss": 0.0002,
"step": 277
},
{
"epoch": 2.836734693877551,
"grad_norm": 0.003005703678354621,
"learning_rate": 9.659863945578232e-06,
"loss": 0.0002,
"step": 278
},
{
"epoch": 2.8469387755102042,
"grad_norm": 0.0068191043101251125,
"learning_rate": 9.61451247165533e-06,
"loss": 0.0004,
"step": 279
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.008098878897726536,
"learning_rate": 9.569160997732427e-06,
"loss": 0.0004,
"step": 280
},
{
"epoch": 2.86734693877551,
"grad_norm": 0.014086649753153324,
"learning_rate": 9.523809523809525e-06,
"loss": 0.0004,
"step": 281
},
{
"epoch": 2.877551020408163,
"grad_norm": 0.004192018415778875,
"learning_rate": 9.478458049886621e-06,
"loss": 0.0002,
"step": 282
},
{
"epoch": 2.887755102040816,
"grad_norm": 0.0025980896316468716,
"learning_rate": 9.43310657596372e-06,
"loss": 0.0002,
"step": 283
},
{
"epoch": 2.8979591836734695,
"grad_norm": 0.010852901265025139,
"learning_rate": 9.387755102040818e-06,
"loss": 0.0004,
"step": 284
},
{
"epoch": 2.9081632653061225,
"grad_norm": 0.003038214286789298,
"learning_rate": 9.342403628117914e-06,
"loss": 0.0002,
"step": 285
},
{
"epoch": 2.9183673469387754,
"grad_norm": 0.0038609837647527456,
"learning_rate": 9.297052154195013e-06,
"loss": 0.0002,
"step": 286
},
{
"epoch": 2.928571428571429,
"grad_norm": 0.0073219058103859425,
"learning_rate": 9.251700680272109e-06,
"loss": 0.0004,
"step": 287
},
{
"epoch": 2.938775510204082,
"grad_norm": 0.004274952691048384,
"learning_rate": 9.206349206349207e-06,
"loss": 0.0003,
"step": 288
},
{
"epoch": 2.9489795918367347,
"grad_norm": 0.003952549304813147,
"learning_rate": 9.160997732426304e-06,
"loss": 0.0002,
"step": 289
},
{
"epoch": 2.9591836734693877,
"grad_norm": 0.03005502186715603,
"learning_rate": 9.115646258503402e-06,
"loss": 0.0006,
"step": 290
},
{
"epoch": 2.9693877551020407,
"grad_norm": 0.0023858973290771246,
"learning_rate": 9.0702947845805e-06,
"loss": 0.0002,
"step": 291
},
{
"epoch": 2.979591836734694,
"grad_norm": 0.004339877981692553,
"learning_rate": 9.024943310657597e-06,
"loss": 0.0003,
"step": 292
},
{
"epoch": 2.989795918367347,
"grad_norm": 0.006001957226544619,
"learning_rate": 8.979591836734695e-06,
"loss": 0.0003,
"step": 293
},
{
"epoch": 3.0,
"grad_norm": 0.007597202900797129,
"learning_rate": 8.934240362811792e-06,
"loss": 0.0002,
"step": 294
},
{
"epoch": 3.010204081632653,
"grad_norm": 0.003272986738011241,
"learning_rate": 8.888888888888888e-06,
"loss": 0.0002,
"step": 295
},
{
"epoch": 3.020408163265306,
"grad_norm": 0.0023373092990368605,
"learning_rate": 8.843537414965987e-06,
"loss": 0.0001,
"step": 296
},
{
"epoch": 3.0306122448979593,
"grad_norm": 0.0037628381978720427,
"learning_rate": 8.798185941043085e-06,
"loss": 0.0002,
"step": 297
},
{
"epoch": 3.0408163265306123,
"grad_norm": 0.011344632133841515,
"learning_rate": 8.752834467120183e-06,
"loss": 0.0005,
"step": 298
},
{
"epoch": 3.0510204081632653,
"grad_norm": 0.009169838391244411,
"learning_rate": 8.70748299319728e-06,
"loss": 0.0004,
"step": 299
},
{
"epoch": 3.061224489795918,
"grad_norm": 0.019571438431739807,
"learning_rate": 8.662131519274378e-06,
"loss": 0.0005,
"step": 300
},
{
"epoch": 3.0714285714285716,
"grad_norm": 0.0035050984006375074,
"learning_rate": 8.616780045351474e-06,
"loss": 0.0002,
"step": 301
},
{
"epoch": 3.0816326530612246,
"grad_norm": 0.004051654599606991,
"learning_rate": 8.571428571428571e-06,
"loss": 0.0002,
"step": 302
},
{
"epoch": 3.0918367346938775,
"grad_norm": 0.002926639514043927,
"learning_rate": 8.52607709750567e-06,
"loss": 0.0002,
"step": 303
},
{
"epoch": 3.1020408163265305,
"grad_norm": 0.013055351562798023,
"learning_rate": 8.480725623582767e-06,
"loss": 0.0004,
"step": 304
},
{
"epoch": 3.1122448979591835,
"grad_norm": 0.004692048765718937,
"learning_rate": 8.435374149659866e-06,
"loss": 0.0002,
"step": 305
},
{
"epoch": 3.122448979591837,
"grad_norm": 0.0025202229153364897,
"learning_rate": 8.390022675736962e-06,
"loss": 0.0002,
"step": 306
},
{
"epoch": 3.13265306122449,
"grad_norm": 0.005598872900009155,
"learning_rate": 8.34467120181406e-06,
"loss": 0.0002,
"step": 307
},
{
"epoch": 3.142857142857143,
"grad_norm": 0.0032469748985022306,
"learning_rate": 8.299319727891157e-06,
"loss": 0.0002,
"step": 308
},
{
"epoch": 3.1530612244897958,
"grad_norm": 0.004803687799721956,
"learning_rate": 8.253968253968254e-06,
"loss": 0.0003,
"step": 309
},
{
"epoch": 3.163265306122449,
"grad_norm": 0.0046676271595060825,
"learning_rate": 8.208616780045352e-06,
"loss": 0.0003,
"step": 310
},
{
"epoch": 3.173469387755102,
"grad_norm": 0.002468443475663662,
"learning_rate": 8.16326530612245e-06,
"loss": 0.0002,
"step": 311
},
{
"epoch": 3.183673469387755,
"grad_norm": 0.006342902779579163,
"learning_rate": 8.117913832199548e-06,
"loss": 0.0004,
"step": 312
},
{
"epoch": 3.193877551020408,
"grad_norm": 0.0023443913087248802,
"learning_rate": 8.072562358276645e-06,
"loss": 0.0002,
"step": 313
},
{
"epoch": 3.204081632653061,
"grad_norm": 0.0020017994102090597,
"learning_rate": 8.027210884353741e-06,
"loss": 0.0001,
"step": 314
},
{
"epoch": 3.2142857142857144,
"grad_norm": 0.0026365304365754128,
"learning_rate": 7.98185941043084e-06,
"loss": 0.0002,
"step": 315
},
{
"epoch": 3.2244897959183674,
"grad_norm": 0.0056705656461417675,
"learning_rate": 7.936507936507936e-06,
"loss": 0.0004,
"step": 316
},
{
"epoch": 3.2346938775510203,
"grad_norm": 0.009689562022686005,
"learning_rate": 7.891156462585034e-06,
"loss": 0.0004,
"step": 317
},
{
"epoch": 3.2448979591836733,
"grad_norm": 0.008008199743926525,
"learning_rate": 7.845804988662133e-06,
"loss": 0.0003,
"step": 318
},
{
"epoch": 3.2551020408163267,
"grad_norm": 0.0026869464199990034,
"learning_rate": 7.800453514739231e-06,
"loss": 0.0002,
"step": 319
},
{
"epoch": 3.2653061224489797,
"grad_norm": 0.0031625712290406227,
"learning_rate": 7.755102040816327e-06,
"loss": 0.0002,
"step": 320
},
{
"epoch": 3.2755102040816326,
"grad_norm": 0.0028163609094917774,
"learning_rate": 7.709750566893424e-06,
"loss": 0.0002,
"step": 321
},
{
"epoch": 3.2857142857142856,
"grad_norm": 0.005660755559802055,
"learning_rate": 7.664399092970522e-06,
"loss": 0.0003,
"step": 322
},
{
"epoch": 3.295918367346939,
"grad_norm": 0.0039995694532990456,
"learning_rate": 7.61904761904762e-06,
"loss": 0.0003,
"step": 323
},
{
"epoch": 3.306122448979592,
"grad_norm": 0.005670357029885054,
"learning_rate": 7.573696145124717e-06,
"loss": 0.0003,
"step": 324
},
{
"epoch": 3.316326530612245,
"grad_norm": 0.0025813328102231026,
"learning_rate": 7.528344671201815e-06,
"loss": 0.0002,
"step": 325
},
{
"epoch": 3.326530612244898,
"grad_norm": 0.0030447246972471476,
"learning_rate": 7.482993197278913e-06,
"loss": 0.0002,
"step": 326
},
{
"epoch": 3.336734693877551,
"grad_norm": 0.0019142641685903072,
"learning_rate": 7.437641723356009e-06,
"loss": 0.0001,
"step": 327
},
{
"epoch": 3.3469387755102042,
"grad_norm": 0.004176548682153225,
"learning_rate": 7.3922902494331075e-06,
"loss": 0.0002,
"step": 328
},
{
"epoch": 3.357142857142857,
"grad_norm": 0.009249200113117695,
"learning_rate": 7.346938775510205e-06,
"loss": 0.0004,
"step": 329
},
{
"epoch": 3.36734693877551,
"grad_norm": 0.005048077553510666,
"learning_rate": 7.301587301587301e-06,
"loss": 0.0002,
"step": 330
},
{
"epoch": 3.377551020408163,
"grad_norm": 0.0024696551263332367,
"learning_rate": 7.2562358276644e-06,
"loss": 0.0002,
"step": 331
},
{
"epoch": 3.387755102040816,
"grad_norm": 0.002270912518724799,
"learning_rate": 7.210884353741497e-06,
"loss": 0.0002,
"step": 332
},
{
"epoch": 3.3979591836734695,
"grad_norm": 0.0020916208159178495,
"learning_rate": 7.165532879818595e-06,
"loss": 0.0001,
"step": 333
},
{
"epoch": 3.4081632653061225,
"grad_norm": 0.002925699343904853,
"learning_rate": 7.120181405895692e-06,
"loss": 0.0002,
"step": 334
},
{
"epoch": 3.4183673469387754,
"grad_norm": 0.003520503407344222,
"learning_rate": 7.07482993197279e-06,
"loss": 0.0002,
"step": 335
},
{
"epoch": 3.4285714285714284,
"grad_norm": 0.0024117021821439266,
"learning_rate": 7.0294784580498875e-06,
"loss": 0.0001,
"step": 336
},
{
"epoch": 3.438775510204082,
"grad_norm": 0.011653084307909012,
"learning_rate": 6.984126984126984e-06,
"loss": 0.0005,
"step": 337
},
{
"epoch": 3.4489795918367347,
"grad_norm": 0.0021838736720383167,
"learning_rate": 6.938775510204082e-06,
"loss": 0.0001,
"step": 338
},
{
"epoch": 3.4591836734693877,
"grad_norm": 0.005142625421285629,
"learning_rate": 6.89342403628118e-06,
"loss": 0.0003,
"step": 339
},
{
"epoch": 3.4693877551020407,
"grad_norm": 0.009561867453157902,
"learning_rate": 6.848072562358277e-06,
"loss": 0.0003,
"step": 340
},
{
"epoch": 3.479591836734694,
"grad_norm": 0.0029086614958941936,
"learning_rate": 6.8027210884353745e-06,
"loss": 0.0002,
"step": 341
},
{
"epoch": 3.489795918367347,
"grad_norm": 0.004574024584144354,
"learning_rate": 6.757369614512473e-06,
"loss": 0.0002,
"step": 342
},
{
"epoch": 3.5,
"grad_norm": 0.002138937823474407,
"learning_rate": 6.71201814058957e-06,
"loss": 0.0001,
"step": 343
},
{
"epoch": 3.510204081632653,
"grad_norm": 0.003517791396006942,
"learning_rate": 6.666666666666667e-06,
"loss": 0.0002,
"step": 344
},
{
"epoch": 3.520408163265306,
"grad_norm": 0.03443054482340813,
"learning_rate": 6.621315192743765e-06,
"loss": 0.0008,
"step": 345
},
{
"epoch": 3.5306122448979593,
"grad_norm": 0.008042026311159134,
"learning_rate": 6.575963718820862e-06,
"loss": 0.0002,
"step": 346
},
{
"epoch": 3.5408163265306123,
"grad_norm": 0.0047872308641672134,
"learning_rate": 6.530612244897959e-06,
"loss": 0.0003,
"step": 347
},
{
"epoch": 3.5510204081632653,
"grad_norm": 0.010120042599737644,
"learning_rate": 6.485260770975057e-06,
"loss": 0.0003,
"step": 348
},
{
"epoch": 3.561224489795918,
"grad_norm": 0.004412388429045677,
"learning_rate": 6.4399092970521545e-06,
"loss": 0.0003,
"step": 349
},
{
"epoch": 3.571428571428571,
"grad_norm": 0.003939002752304077,
"learning_rate": 6.394557823129253e-06,
"loss": 0.0002,
"step": 350
},
{
"epoch": 3.5816326530612246,
"grad_norm": 0.003072823630645871,
"learning_rate": 6.349206349206349e-06,
"loss": 0.0002,
"step": 351
},
{
"epoch": 3.5918367346938775,
"grad_norm": 0.002594695193693042,
"learning_rate": 6.3038548752834475e-06,
"loss": 0.0002,
"step": 352
},
{
"epoch": 3.6020408163265305,
"grad_norm": 0.0021781930699944496,
"learning_rate": 6.258503401360545e-06,
"loss": 0.0001,
"step": 353
},
{
"epoch": 3.612244897959184,
"grad_norm": 0.004957903642207384,
"learning_rate": 6.2131519274376415e-06,
"loss": 0.0002,
"step": 354
},
{
"epoch": 3.622448979591837,
"grad_norm": 0.0020664699841290712,
"learning_rate": 6.16780045351474e-06,
"loss": 0.0001,
"step": 355
},
{
"epoch": 3.63265306122449,
"grad_norm": 0.00455419672653079,
"learning_rate": 6.122448979591837e-06,
"loss": 0.0002,
"step": 356
},
{
"epoch": 3.642857142857143,
"grad_norm": 0.001891249674372375,
"learning_rate": 6.077097505668935e-06,
"loss": 0.0001,
"step": 357
},
{
"epoch": 3.6530612244897958,
"grad_norm": 0.0015174165600910783,
"learning_rate": 6.031746031746032e-06,
"loss": 0.0001,
"step": 358
},
{
"epoch": 3.663265306122449,
"grad_norm": 0.008895975537598133,
"learning_rate": 5.98639455782313e-06,
"loss": 0.0003,
"step": 359
},
{
"epoch": 3.673469387755102,
"grad_norm": 0.010570279322564602,
"learning_rate": 5.9410430839002275e-06,
"loss": 0.0003,
"step": 360
},
{
"epoch": 3.683673469387755,
"grad_norm": 0.005755205638706684,
"learning_rate": 5.895691609977324e-06,
"loss": 0.0002,
"step": 361
},
{
"epoch": 3.693877551020408,
"grad_norm": 0.00319477915763855,
"learning_rate": 5.850340136054422e-06,
"loss": 0.0002,
"step": 362
},
{
"epoch": 3.704081632653061,
"grad_norm": 0.0023295124992728233,
"learning_rate": 5.80498866213152e-06,
"loss": 0.0001,
"step": 363
},
{
"epoch": 3.7142857142857144,
"grad_norm": 0.0038169752806425095,
"learning_rate": 5.759637188208618e-06,
"loss": 0.0003,
"step": 364
},
{
"epoch": 3.7244897959183674,
"grad_norm": 0.007799374870955944,
"learning_rate": 5.7142857142857145e-06,
"loss": 0.0003,
"step": 365
},
{
"epoch": 3.7346938775510203,
"grad_norm": 0.002488058526068926,
"learning_rate": 5.668934240362812e-06,
"loss": 0.0002,
"step": 366
},
{
"epoch": 3.7448979591836737,
"grad_norm": 0.01512609887868166,
"learning_rate": 5.62358276643991e-06,
"loss": 0.0006,
"step": 367
},
{
"epoch": 3.7551020408163263,
"grad_norm": 0.004572188016027212,
"learning_rate": 5.578231292517007e-06,
"loss": 0.0003,
"step": 368
},
{
"epoch": 3.7653061224489797,
"grad_norm": 0.0024051195941865444,
"learning_rate": 5.532879818594105e-06,
"loss": 0.0002,
"step": 369
},
{
"epoch": 3.7755102040816326,
"grad_norm": 0.0032509195152670145,
"learning_rate": 5.487528344671202e-06,
"loss": 0.0002,
"step": 370
},
{
"epoch": 3.7857142857142856,
"grad_norm": 0.0019066549139097333,
"learning_rate": 5.442176870748301e-06,
"loss": 0.0001,
"step": 371
},
{
"epoch": 3.795918367346939,
"grad_norm": 0.004059778060764074,
"learning_rate": 5.396825396825397e-06,
"loss": 0.0002,
"step": 372
},
{
"epoch": 3.806122448979592,
"grad_norm": 0.003823889186605811,
"learning_rate": 5.3514739229024945e-06,
"loss": 0.0002,
"step": 373
},
{
"epoch": 3.816326530612245,
"grad_norm": 0.005696111358702183,
"learning_rate": 5.306122448979593e-06,
"loss": 0.0003,
"step": 374
},
{
"epoch": 3.826530612244898,
"grad_norm": 0.002276304177939892,
"learning_rate": 5.260770975056689e-06,
"loss": 0.0001,
"step": 375
},
{
"epoch": 3.836734693877551,
"grad_norm": 0.003423569956794381,
"learning_rate": 5.2154195011337876e-06,
"loss": 0.0002,
"step": 376
},
{
"epoch": 3.8469387755102042,
"grad_norm": 0.009261609055101871,
"learning_rate": 5.170068027210885e-06,
"loss": 0.0004,
"step": 377
},
{
"epoch": 3.857142857142857,
"grad_norm": 0.0026830616407096386,
"learning_rate": 5.124716553287983e-06,
"loss": 0.0002,
"step": 378
},
{
"epoch": 3.86734693877551,
"grad_norm": 0.007292145863175392,
"learning_rate": 5.07936507936508e-06,
"loss": 0.0003,
"step": 379
},
{
"epoch": 3.877551020408163,
"grad_norm": 0.001463556895032525,
"learning_rate": 5.034013605442177e-06,
"loss": 0.0001,
"step": 380
},
{
"epoch": 3.887755102040816,
"grad_norm": 0.0077773998491466045,
"learning_rate": 4.9886621315192745e-06,
"loss": 0.0003,
"step": 381
},
{
"epoch": 3.8979591836734695,
"grad_norm": 0.001333568710833788,
"learning_rate": 4.943310657596373e-06,
"loss": 0.0001,
"step": 382
},
{
"epoch": 3.9081632653061225,
"grad_norm": 0.0033744387328624725,
"learning_rate": 4.897959183673469e-06,
"loss": 0.0002,
"step": 383
},
{
"epoch": 3.9183673469387754,
"grad_norm": 0.0031404553446918726,
"learning_rate": 4.852607709750568e-06,
"loss": 0.0002,
"step": 384
},
{
"epoch": 3.928571428571429,
"grad_norm": 0.002246819669380784,
"learning_rate": 4.807256235827665e-06,
"loss": 0.0002,
"step": 385
},
{
"epoch": 3.938775510204082,
"grad_norm": 0.006392229348421097,
"learning_rate": 4.761904761904762e-06,
"loss": 0.0003,
"step": 386
},
{
"epoch": 3.9489795918367347,
"grad_norm": 0.002482037292793393,
"learning_rate": 4.71655328798186e-06,
"loss": 0.0002,
"step": 387
},
{
"epoch": 3.9591836734693877,
"grad_norm": 0.0029472103342413902,
"learning_rate": 4.671201814058957e-06,
"loss": 0.0002,
"step": 388
},
{
"epoch": 3.9693877551020407,
"grad_norm": 0.001341557246632874,
"learning_rate": 4.6258503401360546e-06,
"loss": 0.0001,
"step": 389
},
{
"epoch": 3.979591836734694,
"grad_norm": 0.003789098234847188,
"learning_rate": 4.580498866213152e-06,
"loss": 0.0002,
"step": 390
},
{
"epoch": 3.989795918367347,
"grad_norm": 0.0022696068044751883,
"learning_rate": 4.53514739229025e-06,
"loss": 0.0001,
"step": 391
},
{
"epoch": 4.0,
"grad_norm": 0.0036536771804094315,
"learning_rate": 4.489795918367348e-06,
"loss": 0.0001,
"step": 392
},
{
"epoch": 4.010204081632653,
"grad_norm": 0.0017391832079738379,
"learning_rate": 4.444444444444444e-06,
"loss": 0.0001,
"step": 393
},
{
"epoch": 4.020408163265306,
"grad_norm": 0.003093178616836667,
"learning_rate": 4.399092970521542e-06,
"loss": 0.0002,
"step": 394
},
{
"epoch": 4.030612244897959,
"grad_norm": 0.0017501730471849442,
"learning_rate": 4.35374149659864e-06,
"loss": 0.0001,
"step": 395
},
{
"epoch": 4.040816326530612,
"grad_norm": 0.03281351551413536,
"learning_rate": 4.308390022675737e-06,
"loss": 0.0007,
"step": 396
},
{
"epoch": 4.051020408163265,
"grad_norm": 0.002625512657687068,
"learning_rate": 4.263038548752835e-06,
"loss": 0.0002,
"step": 397
},
{
"epoch": 4.061224489795919,
"grad_norm": 0.003758464241400361,
"learning_rate": 4.217687074829933e-06,
"loss": 0.0002,
"step": 398
},
{
"epoch": 4.071428571428571,
"grad_norm": 0.0021065385080873966,
"learning_rate": 4.17233560090703e-06,
"loss": 0.0001,
"step": 399
},
{
"epoch": 4.081632653061225,
"grad_norm": 0.0022317173425108194,
"learning_rate": 4.126984126984127e-06,
"loss": 0.0001,
"step": 400
},
{
"epoch": 4.091836734693878,
"grad_norm": 0.0025758843403309584,
"learning_rate": 4.081632653061225e-06,
"loss": 0.0002,
"step": 401
},
{
"epoch": 4.1020408163265305,
"grad_norm": 0.003262228099629283,
"learning_rate": 4.036281179138322e-06,
"loss": 0.0002,
"step": 402
},
{
"epoch": 4.112244897959184,
"grad_norm": 0.002355805365368724,
"learning_rate": 3.99092970521542e-06,
"loss": 0.0001,
"step": 403
},
{
"epoch": 4.122448979591836,
"grad_norm": 0.002239174908027053,
"learning_rate": 3.945578231292517e-06,
"loss": 0.0001,
"step": 404
},
{
"epoch": 4.13265306122449,
"grad_norm": 0.003173491917550564,
"learning_rate": 3.9002267573696154e-06,
"loss": 0.0002,
"step": 405
},
{
"epoch": 4.142857142857143,
"grad_norm": 0.009472887963056564,
"learning_rate": 3.854875283446712e-06,
"loss": 0.0004,
"step": 406
},
{
"epoch": 4.153061224489796,
"grad_norm": 0.010682443156838417,
"learning_rate": 3.80952380952381e-06,
"loss": 0.0004,
"step": 407
},
{
"epoch": 4.163265306122449,
"grad_norm": 0.01789182610809803,
"learning_rate": 3.7641723356009076e-06,
"loss": 0.0006,
"step": 408
},
{
"epoch": 4.173469387755102,
"grad_norm": 0.002530967351049185,
"learning_rate": 3.7188208616780046e-06,
"loss": 0.0001,
"step": 409
},
{
"epoch": 4.183673469387755,
"grad_norm": 0.0029371839482337236,
"learning_rate": 3.6734693877551024e-06,
"loss": 0.0002,
"step": 410
},
{
"epoch": 4.1938775510204085,
"grad_norm": 0.004367890767753124,
"learning_rate": 3.6281179138322e-06,
"loss": 0.0002,
"step": 411
},
{
"epoch": 4.204081632653061,
"grad_norm": 0.0021538427099585533,
"learning_rate": 3.5827664399092976e-06,
"loss": 0.0001,
"step": 412
},
{
"epoch": 4.214285714285714,
"grad_norm": 0.0021221789065748453,
"learning_rate": 3.537414965986395e-06,
"loss": 0.0001,
"step": 413
},
{
"epoch": 4.224489795918367,
"grad_norm": 0.0021122246980667114,
"learning_rate": 3.492063492063492e-06,
"loss": 0.0001,
"step": 414
},
{
"epoch": 4.23469387755102,
"grad_norm": 0.0025512792635709047,
"learning_rate": 3.44671201814059e-06,
"loss": 0.0001,
"step": 415
},
{
"epoch": 4.244897959183674,
"grad_norm": 0.0041538686491549015,
"learning_rate": 3.4013605442176872e-06,
"loss": 0.0003,
"step": 416
},
{
"epoch": 4.255102040816326,
"grad_norm": 0.0017510091420263052,
"learning_rate": 3.356009070294785e-06,
"loss": 0.0001,
"step": 417
},
{
"epoch": 4.26530612244898,
"grad_norm": 0.0024814323987811804,
"learning_rate": 3.3106575963718824e-06,
"loss": 0.0002,
"step": 418
},
{
"epoch": 4.275510204081632,
"grad_norm": 0.00235186074860394,
"learning_rate": 3.2653061224489794e-06,
"loss": 0.0001,
"step": 419
},
{
"epoch": 4.285714285714286,
"grad_norm": 0.0030012091156095266,
"learning_rate": 3.2199546485260772e-06,
"loss": 0.0002,
"step": 420
},
{
"epoch": 4.295918367346939,
"grad_norm": 0.004921985324472189,
"learning_rate": 3.1746031746031746e-06,
"loss": 0.0003,
"step": 421
},
{
"epoch": 4.3061224489795915,
"grad_norm": 0.009844960644841194,
"learning_rate": 3.1292517006802725e-06,
"loss": 0.0004,
"step": 422
},
{
"epoch": 4.316326530612245,
"grad_norm": 0.003105542156845331,
"learning_rate": 3.08390022675737e-06,
"loss": 0.0002,
"step": 423
},
{
"epoch": 4.326530612244898,
"grad_norm": 0.005888419691473246,
"learning_rate": 3.0385487528344677e-06,
"loss": 0.0003,
"step": 424
},
{
"epoch": 4.336734693877551,
"grad_norm": 0.002076453994959593,
"learning_rate": 2.993197278911565e-06,
"loss": 0.0001,
"step": 425
},
{
"epoch": 4.346938775510204,
"grad_norm": 0.0016607132274657488,
"learning_rate": 2.947845804988662e-06,
"loss": 0.0001,
"step": 426
},
{
"epoch": 4.357142857142857,
"grad_norm": 0.0024275570176541805,
"learning_rate": 2.90249433106576e-06,
"loss": 0.0002,
"step": 427
},
{
"epoch": 4.36734693877551,
"grad_norm": 0.003902298165485263,
"learning_rate": 2.8571428571428573e-06,
"loss": 0.0002,
"step": 428
},
{
"epoch": 4.377551020408164,
"grad_norm": 0.0023378883488476276,
"learning_rate": 2.811791383219955e-06,
"loss": 0.0002,
"step": 429
},
{
"epoch": 4.387755102040816,
"grad_norm": 0.0051103937439620495,
"learning_rate": 2.7664399092970525e-06,
"loss": 0.0002,
"step": 430
},
{
"epoch": 4.3979591836734695,
"grad_norm": 0.0026863350067287683,
"learning_rate": 2.7210884353741503e-06,
"loss": 0.0002,
"step": 431
},
{
"epoch": 4.408163265306122,
"grad_norm": 0.002001287881284952,
"learning_rate": 2.6757369614512473e-06,
"loss": 0.0001,
"step": 432
},
{
"epoch": 4.418367346938775,
"grad_norm": 0.008789247833192348,
"learning_rate": 2.6303854875283447e-06,
"loss": 0.0003,
"step": 433
},
{
"epoch": 4.428571428571429,
"grad_norm": 0.004610543139278889,
"learning_rate": 2.5850340136054425e-06,
"loss": 0.0002,
"step": 434
},
{
"epoch": 4.438775510204081,
"grad_norm": 0.00599480327218771,
"learning_rate": 2.53968253968254e-06,
"loss": 0.0003,
"step": 435
},
{
"epoch": 4.448979591836735,
"grad_norm": 0.00554778054356575,
"learning_rate": 2.4943310657596373e-06,
"loss": 0.0003,
"step": 436
},
{
"epoch": 4.459183673469388,
"grad_norm": 0.00669802725315094,
"learning_rate": 2.4489795918367347e-06,
"loss": 0.0003,
"step": 437
},
{
"epoch": 4.469387755102041,
"grad_norm": 0.0016791113885119557,
"learning_rate": 2.4036281179138325e-06,
"loss": 0.0001,
"step": 438
},
{
"epoch": 4.479591836734694,
"grad_norm": 0.0024550866801291704,
"learning_rate": 2.35827664399093e-06,
"loss": 0.0002,
"step": 439
},
{
"epoch": 4.489795918367347,
"grad_norm": 0.0024079831782728434,
"learning_rate": 2.3129251700680273e-06,
"loss": 0.0002,
"step": 440
},
{
"epoch": 4.5,
"grad_norm": 0.0028663375414907932,
"learning_rate": 2.267573696145125e-06,
"loss": 0.0002,
"step": 441
},
{
"epoch": 4.510204081632653,
"grad_norm": 0.008847690187394619,
"learning_rate": 2.222222222222222e-06,
"loss": 0.0003,
"step": 442
},
{
"epoch": 4.520408163265306,
"grad_norm": 0.005266358610242605,
"learning_rate": 2.17687074829932e-06,
"loss": 0.0003,
"step": 443
},
{
"epoch": 4.530612244897959,
"grad_norm": 0.0024195676669478416,
"learning_rate": 2.1315192743764173e-06,
"loss": 0.0002,
"step": 444
},
{
"epoch": 4.540816326530612,
"grad_norm": 0.00421124929562211,
"learning_rate": 2.086167800453515e-06,
"loss": 0.0002,
"step": 445
},
{
"epoch": 4.551020408163265,
"grad_norm": 0.0020824300590902567,
"learning_rate": 2.0408163265306125e-06,
"loss": 0.0001,
"step": 446
},
{
"epoch": 4.561224489795919,
"grad_norm": 0.005051845218986273,
"learning_rate": 1.99546485260771e-06,
"loss": 0.0002,
"step": 447
},
{
"epoch": 4.571428571428571,
"grad_norm": 0.0022977020125836134,
"learning_rate": 1.9501133786848077e-06,
"loss": 0.0002,
"step": 448
},
{
"epoch": 4.581632653061225,
"grad_norm": 0.001990046352148056,
"learning_rate": 1.904761904761905e-06,
"loss": 0.0001,
"step": 449
},
{
"epoch": 4.591836734693878,
"grad_norm": 0.0031708430033177137,
"learning_rate": 1.8594104308390023e-06,
"loss": 0.0002,
"step": 450
},
{
"epoch": 4.6020408163265305,
"grad_norm": 0.0034788285847753286,
"learning_rate": 1.8140589569161e-06,
"loss": 0.0002,
"step": 451
},
{
"epoch": 4.612244897959184,
"grad_norm": 0.0018601809861138463,
"learning_rate": 1.7687074829931975e-06,
"loss": 0.0001,
"step": 452
},
{
"epoch": 4.622448979591836,
"grad_norm": 0.016590220853686333,
"learning_rate": 1.723356009070295e-06,
"loss": 0.0003,
"step": 453
},
{
"epoch": 4.63265306122449,
"grad_norm": 0.003050972009077668,
"learning_rate": 1.6780045351473925e-06,
"loss": 0.0002,
"step": 454
},
{
"epoch": 4.642857142857143,
"grad_norm": 0.002196480752900243,
"learning_rate": 1.6326530612244897e-06,
"loss": 0.0001,
"step": 455
},
{
"epoch": 4.653061224489796,
"grad_norm": 0.0025891121476888657,
"learning_rate": 1.5873015873015873e-06,
"loss": 0.0001,
"step": 456
},
{
"epoch": 4.663265306122449,
"grad_norm": 0.002245939103886485,
"learning_rate": 1.541950113378685e-06,
"loss": 0.0001,
"step": 457
},
{
"epoch": 4.673469387755102,
"grad_norm": 0.0021706093102693558,
"learning_rate": 1.4965986394557825e-06,
"loss": 0.0001,
"step": 458
},
{
"epoch": 4.683673469387755,
"grad_norm": 0.0034395295660942793,
"learning_rate": 1.45124716553288e-06,
"loss": 0.0002,
"step": 459
},
{
"epoch": 4.6938775510204085,
"grad_norm": 0.0023007583804428577,
"learning_rate": 1.4058956916099775e-06,
"loss": 0.0001,
"step": 460
},
{
"epoch": 4.704081632653061,
"grad_norm": 0.004494468215852976,
"learning_rate": 1.3605442176870751e-06,
"loss": 0.0002,
"step": 461
},
{
"epoch": 4.714285714285714,
"grad_norm": 0.0017052018083631992,
"learning_rate": 1.3151927437641723e-06,
"loss": 0.0001,
"step": 462
},
{
"epoch": 4.724489795918368,
"grad_norm": 0.0016002283664420247,
"learning_rate": 1.26984126984127e-06,
"loss": 0.0001,
"step": 463
},
{
"epoch": 4.73469387755102,
"grad_norm": 0.0033771556336432695,
"learning_rate": 1.2244897959183673e-06,
"loss": 0.0002,
"step": 464
},
{
"epoch": 4.744897959183674,
"grad_norm": 0.002580232685431838,
"learning_rate": 1.179138321995465e-06,
"loss": 0.0002,
"step": 465
},
{
"epoch": 4.755102040816326,
"grad_norm": 0.0020106956362724304,
"learning_rate": 1.1337868480725626e-06,
"loss": 0.0001,
"step": 466
},
{
"epoch": 4.76530612244898,
"grad_norm": 0.0036402051337063313,
"learning_rate": 1.08843537414966e-06,
"loss": 0.0002,
"step": 467
},
{
"epoch": 4.775510204081632,
"grad_norm": 0.004601712804287672,
"learning_rate": 1.0430839002267576e-06,
"loss": 0.0002,
"step": 468
},
{
"epoch": 4.785714285714286,
"grad_norm": 0.002733904868364334,
"learning_rate": 9.97732426303855e-07,
"loss": 0.0002,
"step": 469
},
{
"epoch": 4.795918367346939,
"grad_norm": 0.0027253010775893927,
"learning_rate": 9.523809523809525e-07,
"loss": 0.0002,
"step": 470
},
{
"epoch": 4.8061224489795915,
"grad_norm": 0.02664267271757126,
"learning_rate": 9.0702947845805e-07,
"loss": 0.0007,
"step": 471
},
{
"epoch": 4.816326530612245,
"grad_norm": 0.002349977381527424,
"learning_rate": 8.616780045351475e-07,
"loss": 0.0002,
"step": 472
},
{
"epoch": 4.826530612244898,
"grad_norm": 0.01735026389360428,
"learning_rate": 8.163265306122449e-07,
"loss": 0.0006,
"step": 473
},
{
"epoch": 4.836734693877551,
"grad_norm": 0.003476122161373496,
"learning_rate": 7.709750566893425e-07,
"loss": 0.0002,
"step": 474
},
{
"epoch": 4.846938775510204,
"grad_norm": 0.0016622812254354358,
"learning_rate": 7.2562358276644e-07,
"loss": 0.0001,
"step": 475
},
{
"epoch": 4.857142857142857,
"grad_norm": 0.0042258999310433865,
"learning_rate": 6.802721088435376e-07,
"loss": 0.0002,
"step": 476
},
{
"epoch": 4.86734693877551,
"grad_norm": 0.0023899853695183992,
"learning_rate": 6.34920634920635e-07,
"loss": 0.0001,
"step": 477
},
{
"epoch": 4.877551020408164,
"grad_norm": 0.0037495435681194067,
"learning_rate": 5.895691609977325e-07,
"loss": 0.0002,
"step": 478
},
{
"epoch": 4.887755102040816,
"grad_norm": 0.00362688978202641,
"learning_rate": 5.4421768707483e-07,
"loss": 0.0002,
"step": 479
},
{
"epoch": 4.8979591836734695,
"grad_norm": 0.005572126246988773,
"learning_rate": 4.988662131519275e-07,
"loss": 0.0003,
"step": 480
},
{
"epoch": 4.908163265306122,
"grad_norm": 0.0033278209157288074,
"learning_rate": 4.53514739229025e-07,
"loss": 0.0002,
"step": 481
},
{
"epoch": 4.918367346938775,
"grad_norm": 0.0017971718916669488,
"learning_rate": 4.0816326530612243e-07,
"loss": 0.0001,
"step": 482
},
{
"epoch": 4.928571428571429,
"grad_norm": 0.003246983280405402,
"learning_rate": 3.6281179138322e-07,
"loss": 0.0002,
"step": 483
},
{
"epoch": 4.938775510204081,
"grad_norm": 0.002570765558630228,
"learning_rate": 3.174603174603175e-07,
"loss": 0.0002,
"step": 484
},
{
"epoch": 4.948979591836735,
"grad_norm": 0.003006896935403347,
"learning_rate": 2.72108843537415e-07,
"loss": 0.0002,
"step": 485
},
{
"epoch": 4.959183673469388,
"grad_norm": 0.003843962447717786,
"learning_rate": 2.267573696145125e-07,
"loss": 0.0002,
"step": 486
},
{
"epoch": 4.969387755102041,
"grad_norm": 0.005520265083760023,
"learning_rate": 1.8140589569161e-07,
"loss": 0.0002,
"step": 487
},
{
"epoch": 4.979591836734694,
"grad_norm": 0.0035644182935357094,
"learning_rate": 1.360544217687075e-07,
"loss": 0.0002,
"step": 488
},
{
"epoch": 4.989795918367347,
"grad_norm": 0.009897944517433643,
"learning_rate": 9.0702947845805e-08,
"loss": 0.0004,
"step": 489
},
{
"epoch": 5.0,
"grad_norm": 0.005771194584667683,
"learning_rate": 4.53514739229025e-08,
"loss": 0.0002,
"step": 490
}
],
"logging_steps": 1,
"max_steps": 490,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}