9b-135 / trainer_state.json
furproxy's picture
Upload folder using huggingface_hub
5fa1c56 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 3564,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0016835016835016834,
"grad_norm": 9.827384948730469,
"learning_rate": 1.1173184357541899e-08,
"loss": 1.7055253982543945,
"step": 2
},
{
"epoch": 0.003367003367003367,
"grad_norm": 9.42782211303711,
"learning_rate": 3.3519553072625695e-08,
"loss": 1.2431578636169434,
"step": 4
},
{
"epoch": 0.005050505050505051,
"grad_norm": 9.667145729064941,
"learning_rate": 5.586592178770949e-08,
"loss": 1.6887383460998535,
"step": 6
},
{
"epoch": 0.006734006734006734,
"grad_norm": 11.942709922790527,
"learning_rate": 7.82122905027933e-08,
"loss": 1.6064767837524414,
"step": 8
},
{
"epoch": 0.008417508417508417,
"grad_norm": 131.6094207763672,
"learning_rate": 1.005586592178771e-07,
"loss": 4.499759674072266,
"step": 10
},
{
"epoch": 0.010101010101010102,
"grad_norm": 6.955765724182129,
"learning_rate": 1.2290502793296089e-07,
"loss": 1.9788310527801514,
"step": 12
},
{
"epoch": 0.011784511784511785,
"grad_norm": 4.201331615447998,
"learning_rate": 1.452513966480447e-07,
"loss": 1.6753560304641724,
"step": 14
},
{
"epoch": 0.013468013468013467,
"grad_norm": 17.091062545776367,
"learning_rate": 1.6759776536312846e-07,
"loss": 1.6581202745437622,
"step": 16
},
{
"epoch": 0.015151515151515152,
"grad_norm": 22.55893325805664,
"learning_rate": 1.8994413407821228e-07,
"loss": 2.7158942222595215,
"step": 18
},
{
"epoch": 0.016835016835016835,
"grad_norm": 6.976036548614502,
"learning_rate": 2.122905027932961e-07,
"loss": 1.9487460851669312,
"step": 20
},
{
"epoch": 0.018518518518518517,
"grad_norm": 4.87603759765625,
"learning_rate": 2.3463687150837988e-07,
"loss": 1.845729947090149,
"step": 22
},
{
"epoch": 0.020202020202020204,
"grad_norm": 13.902255058288574,
"learning_rate": 2.5698324022346367e-07,
"loss": 3.498323917388916,
"step": 24
},
{
"epoch": 0.021885521885521887,
"grad_norm": 14.09145450592041,
"learning_rate": 2.7932960893854745e-07,
"loss": 2.7927517890930176,
"step": 26
},
{
"epoch": 0.02356902356902357,
"grad_norm": 12.507741928100586,
"learning_rate": 3.016759776536313e-07,
"loss": 2.1394832134246826,
"step": 28
},
{
"epoch": 0.025252525252525252,
"grad_norm": 50.04438018798828,
"learning_rate": 3.240223463687151e-07,
"loss": 3.230577230453491,
"step": 30
},
{
"epoch": 0.026936026936026935,
"grad_norm": 22.915058135986328,
"learning_rate": 3.4636871508379887e-07,
"loss": 1.7826504707336426,
"step": 32
},
{
"epoch": 0.02861952861952862,
"grad_norm": 34.94866943359375,
"learning_rate": 3.6871508379888266e-07,
"loss": 3.590939998626709,
"step": 34
},
{
"epoch": 0.030303030303030304,
"grad_norm": 9.724323272705078,
"learning_rate": 3.9106145251396645e-07,
"loss": 1.9341622591018677,
"step": 36
},
{
"epoch": 0.03198653198653199,
"grad_norm": 16.15651512145996,
"learning_rate": 4.134078212290503e-07,
"loss": 1.4625201225280762,
"step": 38
},
{
"epoch": 0.03367003367003367,
"grad_norm": 7.4519453048706055,
"learning_rate": 4.35754189944134e-07,
"loss": 2.242250919342041,
"step": 40
},
{
"epoch": 0.03535353535353535,
"grad_norm": 6.571437835693359,
"learning_rate": 4.5810055865921786e-07,
"loss": 2.679516315460205,
"step": 42
},
{
"epoch": 0.037037037037037035,
"grad_norm": 19.185373306274414,
"learning_rate": 4.804469273743016e-07,
"loss": 2.1858067512512207,
"step": 44
},
{
"epoch": 0.03872053872053872,
"grad_norm": 18.07056999206543,
"learning_rate": 5.027932960893855e-07,
"loss": 1.433751106262207,
"step": 46
},
{
"epoch": 0.04040404040404041,
"grad_norm": 24.015710830688477,
"learning_rate": 5.251396648044693e-07,
"loss": 2.102412700653076,
"step": 48
},
{
"epoch": 0.04208754208754209,
"grad_norm": 22.281003952026367,
"learning_rate": 5.474860335195531e-07,
"loss": 1.8496794700622559,
"step": 50
},
{
"epoch": 0.04377104377104377,
"grad_norm": 16.242393493652344,
"learning_rate": 5.698324022346367e-07,
"loss": 1.9199731349945068,
"step": 52
},
{
"epoch": 0.045454545454545456,
"grad_norm": 11.205278396606445,
"learning_rate": 5.921787709497206e-07,
"loss": 1.8013508319854736,
"step": 54
},
{
"epoch": 0.04713804713804714,
"grad_norm": 4.4281840324401855,
"learning_rate": 6.145251396648044e-07,
"loss": 1.5387322902679443,
"step": 56
},
{
"epoch": 0.04882154882154882,
"grad_norm": 27.68507194519043,
"learning_rate": 6.368715083798882e-07,
"loss": 1.7617017030715942,
"step": 58
},
{
"epoch": 0.050505050505050504,
"grad_norm": 13.444940567016602,
"learning_rate": 6.59217877094972e-07,
"loss": 1.5345146656036377,
"step": 60
},
{
"epoch": 0.05218855218855219,
"grad_norm": 12.37048625946045,
"learning_rate": 6.815642458100558e-07,
"loss": 1.5472785234451294,
"step": 62
},
{
"epoch": 0.05387205387205387,
"grad_norm": 5.660282135009766,
"learning_rate": 7.039106145251397e-07,
"loss": 1.3724396228790283,
"step": 64
},
{
"epoch": 0.05555555555555555,
"grad_norm": 32.10633087158203,
"learning_rate": 7.262569832402235e-07,
"loss": 1.7364461421966553,
"step": 66
},
{
"epoch": 0.05723905723905724,
"grad_norm": 15.033787727355957,
"learning_rate": 7.486033519553073e-07,
"loss": 1.5618245601654053,
"step": 68
},
{
"epoch": 0.058922558922558925,
"grad_norm": 5.500316143035889,
"learning_rate": 7.709497206703909e-07,
"loss": 1.4692459106445312,
"step": 70
},
{
"epoch": 0.06060606060606061,
"grad_norm": 7.862852096557617,
"learning_rate": 7.932960893854748e-07,
"loss": 1.767068862915039,
"step": 72
},
{
"epoch": 0.06228956228956229,
"grad_norm": 3.3375768661499023,
"learning_rate": 8.156424581005586e-07,
"loss": 1.5882585048675537,
"step": 74
},
{
"epoch": 0.06397306397306397,
"grad_norm": 4.3638529777526855,
"learning_rate": 8.379888268156424e-07,
"loss": 1.0791618824005127,
"step": 76
},
{
"epoch": 0.06565656565656566,
"grad_norm": 3.2826614379882812,
"learning_rate": 8.603351955307262e-07,
"loss": 1.623827338218689,
"step": 78
},
{
"epoch": 0.06734006734006734,
"grad_norm": 13.223998069763184,
"learning_rate": 8.8268156424581e-07,
"loss": 1.4189568758010864,
"step": 80
},
{
"epoch": 0.06902356902356903,
"grad_norm": 8.176948547363281,
"learning_rate": 9.050279329608939e-07,
"loss": 1.5663306713104248,
"step": 82
},
{
"epoch": 0.0707070707070707,
"grad_norm": 8.477921485900879,
"learning_rate": 9.273743016759777e-07,
"loss": 1.3473039865493774,
"step": 84
},
{
"epoch": 0.0723905723905724,
"grad_norm": 5.039812088012695,
"learning_rate": 9.497206703910615e-07,
"loss": 1.4909709692001343,
"step": 86
},
{
"epoch": 0.07407407407407407,
"grad_norm": 4.436509132385254,
"learning_rate": 9.720670391061452e-07,
"loss": 1.3051445484161377,
"step": 88
},
{
"epoch": 0.07575757575757576,
"grad_norm": 10.7329740524292,
"learning_rate": 9.94413407821229e-07,
"loss": 1.4471063613891602,
"step": 90
},
{
"epoch": 0.07744107744107744,
"grad_norm": 48.17202377319336,
"learning_rate": 1.0167597765363128e-06,
"loss": 1.1504158973693848,
"step": 92
},
{
"epoch": 0.07912457912457913,
"grad_norm": 9.630391120910645,
"learning_rate": 1.0391061452513965e-06,
"loss": 1.238828182220459,
"step": 94
},
{
"epoch": 0.08080808080808081,
"grad_norm": 3.6707308292388916,
"learning_rate": 1.0614525139664804e-06,
"loss": 1.29024076461792,
"step": 96
},
{
"epoch": 0.08249158249158249,
"grad_norm": 20.06619644165039,
"learning_rate": 1.0837988826815643e-06,
"loss": 1.2375919818878174,
"step": 98
},
{
"epoch": 0.08417508417508418,
"grad_norm": 6.117098331451416,
"learning_rate": 1.106145251396648e-06,
"loss": 1.2162528038024902,
"step": 100
},
{
"epoch": 0.08585858585858586,
"grad_norm": 7.965595245361328,
"learning_rate": 1.1284916201117319e-06,
"loss": 1.0878969430923462,
"step": 102
},
{
"epoch": 0.08754208754208755,
"grad_norm": 3.471269369125366,
"learning_rate": 1.1508379888268155e-06,
"loss": 0.8488566875457764,
"step": 104
},
{
"epoch": 0.08922558922558922,
"grad_norm": 19.03371238708496,
"learning_rate": 1.1731843575418994e-06,
"loss": 0.9605998992919922,
"step": 106
},
{
"epoch": 0.09090909090909091,
"grad_norm": 4.8145551681518555,
"learning_rate": 1.1955307262569831e-06,
"loss": 1.2580342292785645,
"step": 108
},
{
"epoch": 0.09259259259259259,
"grad_norm": 12.215010643005371,
"learning_rate": 1.217877094972067e-06,
"loss": 0.8208008408546448,
"step": 110
},
{
"epoch": 0.09427609427609428,
"grad_norm": 5.212827682495117,
"learning_rate": 1.2402234636871507e-06,
"loss": 1.2487308979034424,
"step": 112
},
{
"epoch": 0.09595959595959595,
"grad_norm": 110.1784439086914,
"learning_rate": 1.2625698324022344e-06,
"loss": 1.0615664720535278,
"step": 114
},
{
"epoch": 0.09764309764309764,
"grad_norm": 8.633198738098145,
"learning_rate": 1.2849162011173185e-06,
"loss": 0.7479297518730164,
"step": 116
},
{
"epoch": 0.09932659932659933,
"grad_norm": 3.4412970542907715,
"learning_rate": 1.3072625698324022e-06,
"loss": 1.1516764163970947,
"step": 118
},
{
"epoch": 0.10101010101010101,
"grad_norm": 2.8980441093444824,
"learning_rate": 1.329608938547486e-06,
"loss": 1.0023488998413086,
"step": 120
},
{
"epoch": 0.1026936026936027,
"grad_norm": 4.491576671600342,
"learning_rate": 1.3519553072625697e-06,
"loss": 1.207779884338379,
"step": 122
},
{
"epoch": 0.10437710437710437,
"grad_norm": 5.334079742431641,
"learning_rate": 1.3743016759776536e-06,
"loss": 0.8073678612709045,
"step": 124
},
{
"epoch": 0.10606060606060606,
"grad_norm": 5.402129650115967,
"learning_rate": 1.3966480446927373e-06,
"loss": 0.7180484533309937,
"step": 126
},
{
"epoch": 0.10774410774410774,
"grad_norm": 33.15776824951172,
"learning_rate": 1.4189944134078212e-06,
"loss": 1.076992392539978,
"step": 128
},
{
"epoch": 0.10942760942760943,
"grad_norm": 12.190916061401367,
"learning_rate": 1.441340782122905e-06,
"loss": 0.9793660640716553,
"step": 130
},
{
"epoch": 0.1111111111111111,
"grad_norm": 5.5417070388793945,
"learning_rate": 1.4636871508379886e-06,
"loss": 0.9299952387809753,
"step": 132
},
{
"epoch": 0.1127946127946128,
"grad_norm": 3.002917766571045,
"learning_rate": 1.4860335195530727e-06,
"loss": 1.1973538398742676,
"step": 134
},
{
"epoch": 0.11447811447811448,
"grad_norm": 13.795450210571289,
"learning_rate": 1.5083798882681564e-06,
"loss": 1.1933711767196655,
"step": 136
},
{
"epoch": 0.11616161616161616,
"grad_norm": 3.4793336391448975,
"learning_rate": 1.5307262569832403e-06,
"loss": 1.5386559963226318,
"step": 138
},
{
"epoch": 0.11784511784511785,
"grad_norm": 9.980926513671875,
"learning_rate": 1.553072625698324e-06,
"loss": 1.125044584274292,
"step": 140
},
{
"epoch": 0.11952861952861953,
"grad_norm": 4.957187175750732,
"learning_rate": 1.5754189944134078e-06,
"loss": 1.0593317747116089,
"step": 142
},
{
"epoch": 0.12121212121212122,
"grad_norm": 14.749825477600098,
"learning_rate": 1.5977653631284915e-06,
"loss": 0.9547094702720642,
"step": 144
},
{
"epoch": 0.12289562289562289,
"grad_norm": 3.5250778198242188,
"learning_rate": 1.6201117318435752e-06,
"loss": 1.1345624923706055,
"step": 146
},
{
"epoch": 0.12457912457912458,
"grad_norm": 3.4003188610076904,
"learning_rate": 1.642458100558659e-06,
"loss": 0.9924101829528809,
"step": 148
},
{
"epoch": 0.12626262626262627,
"grad_norm": 18.434391021728516,
"learning_rate": 1.6648044692737428e-06,
"loss": 1.2128210067749023,
"step": 150
},
{
"epoch": 0.12794612794612795,
"grad_norm": 6.9610066413879395,
"learning_rate": 1.6871508379888269e-06,
"loss": 0.9494305849075317,
"step": 152
},
{
"epoch": 0.12962962962962962,
"grad_norm": 42.241188049316406,
"learning_rate": 1.7094972067039106e-06,
"loss": 1.1769180297851562,
"step": 154
},
{
"epoch": 0.13131313131313133,
"grad_norm": 19.53082275390625,
"learning_rate": 1.7318435754189945e-06,
"loss": 1.0955569744110107,
"step": 156
},
{
"epoch": 0.132996632996633,
"grad_norm": 4.005194187164307,
"learning_rate": 1.7541899441340781e-06,
"loss": 1.0531185865402222,
"step": 158
},
{
"epoch": 0.13468013468013468,
"grad_norm": 5.709774494171143,
"learning_rate": 1.776536312849162e-06,
"loss": 1.1533485651016235,
"step": 160
},
{
"epoch": 0.13636363636363635,
"grad_norm": 2.3597922325134277,
"learning_rate": 1.7988826815642457e-06,
"loss": 1.0321946144104004,
"step": 162
},
{
"epoch": 0.13804713804713806,
"grad_norm": 16.570262908935547,
"learning_rate": 1.8212290502793294e-06,
"loss": 0.9637615084648132,
"step": 164
},
{
"epoch": 0.13973063973063973,
"grad_norm": 8.452648162841797,
"learning_rate": 1.8435754189944133e-06,
"loss": 0.9408825039863586,
"step": 166
},
{
"epoch": 0.1414141414141414,
"grad_norm": 2.8005619049072266,
"learning_rate": 1.865921787709497e-06,
"loss": 1.127833366394043,
"step": 168
},
{
"epoch": 0.14309764309764308,
"grad_norm": 6.316201686859131,
"learning_rate": 1.8882681564245809e-06,
"loss": 1.0138617753982544,
"step": 170
},
{
"epoch": 0.1447811447811448,
"grad_norm": 14.958882331848145,
"learning_rate": 1.9106145251396648e-06,
"loss": 1.0158287286758423,
"step": 172
},
{
"epoch": 0.14646464646464646,
"grad_norm": 4.5443267822265625,
"learning_rate": 1.9329608938547484e-06,
"loss": 0.7117235064506531,
"step": 174
},
{
"epoch": 0.14814814814814814,
"grad_norm": 4.039905548095703,
"learning_rate": 1.9553072625698325e-06,
"loss": 1.0871771574020386,
"step": 176
},
{
"epoch": 0.14983164983164984,
"grad_norm": 3.271326780319214,
"learning_rate": 1.9776536312849162e-06,
"loss": 1.267643690109253,
"step": 178
},
{
"epoch": 0.15151515151515152,
"grad_norm": 5.037292957305908,
"learning_rate": 2e-06,
"loss": 1.0257434844970703,
"step": 180
},
{
"epoch": 0.1531986531986532,
"grad_norm": 4.92929220199585,
"learning_rate": 1.9999984495606584e-06,
"loss": 1.4013102054595947,
"step": 182
},
{
"epoch": 0.15488215488215487,
"grad_norm": 23.51206398010254,
"learning_rate": 1.999993798247977e-06,
"loss": 1.0038059949874878,
"step": 184
},
{
"epoch": 0.15656565656565657,
"grad_norm": 14.101850509643555,
"learning_rate": 1.99998604607798e-06,
"loss": 1.1263923645019531,
"step": 186
},
{
"epoch": 0.15824915824915825,
"grad_norm": 11.950604438781738,
"learning_rate": 1.9999751930773778e-06,
"loss": 0.9272401332855225,
"step": 188
},
{
"epoch": 0.15993265993265993,
"grad_norm": 21.03433609008789,
"learning_rate": 1.999961239283563e-06,
"loss": 0.7770416140556335,
"step": 190
},
{
"epoch": 0.16161616161616163,
"grad_norm": 3.4966766834259033,
"learning_rate": 1.999944184744613e-06,
"loss": 1.348158597946167,
"step": 192
},
{
"epoch": 0.1632996632996633,
"grad_norm": 3.6538894176483154,
"learning_rate": 1.999924029519287e-06,
"loss": 1.2516090869903564,
"step": 194
},
{
"epoch": 0.16498316498316498,
"grad_norm": 4.83535623550415,
"learning_rate": 1.9999007736770295e-06,
"loss": 1.072089672088623,
"step": 196
},
{
"epoch": 0.16666666666666666,
"grad_norm": 3.3021559715270996,
"learning_rate": 1.9998744172979654e-06,
"loss": 1.1623098850250244,
"step": 198
},
{
"epoch": 0.16835016835016836,
"grad_norm": 14.60655689239502,
"learning_rate": 1.9998449604729044e-06,
"loss": 0.8636209964752197,
"step": 200
},
{
"epoch": 0.17003367003367004,
"grad_norm": 12.559534072875977,
"learning_rate": 1.9998124033033366e-06,
"loss": 0.895442008972168,
"step": 202
},
{
"epoch": 0.1717171717171717,
"grad_norm": 4.964874744415283,
"learning_rate": 1.9997767459014363e-06,
"loss": 1.0330384969711304,
"step": 204
},
{
"epoch": 0.1734006734006734,
"grad_norm": 3.3170907497406006,
"learning_rate": 1.9997379883900572e-06,
"loss": 0.9942055940628052,
"step": 206
},
{
"epoch": 0.1750841750841751,
"grad_norm": 4.744529724121094,
"learning_rate": 1.999696130902736e-06,
"loss": 1.2099803686141968,
"step": 208
},
{
"epoch": 0.17676767676767677,
"grad_norm": 11.850593566894531,
"learning_rate": 1.9996511735836895e-06,
"loss": 0.7535406351089478,
"step": 210
},
{
"epoch": 0.17845117845117844,
"grad_norm": 16.69972038269043,
"learning_rate": 1.999603116587814e-06,
"loss": 0.9160436987876892,
"step": 212
},
{
"epoch": 0.18013468013468015,
"grad_norm": 2.5802817344665527,
"learning_rate": 1.9995519600806863e-06,
"loss": 1.3276009559631348,
"step": 214
},
{
"epoch": 0.18181818181818182,
"grad_norm": 9.903021812438965,
"learning_rate": 1.999497704238562e-06,
"loss": 0.8258368372917175,
"step": 216
},
{
"epoch": 0.1835016835016835,
"grad_norm": 10.159919738769531,
"learning_rate": 1.9994403492483755e-06,
"loss": 0.6640470027923584,
"step": 218
},
{
"epoch": 0.18518518518518517,
"grad_norm": 3.8735828399658203,
"learning_rate": 1.999379895307739e-06,
"loss": 1.3416516780853271,
"step": 220
},
{
"epoch": 0.18686868686868688,
"grad_norm": 3.4755043983459473,
"learning_rate": 1.999316342624941e-06,
"loss": 0.9075236320495605,
"step": 222
},
{
"epoch": 0.18855218855218855,
"grad_norm": 5.18587064743042,
"learning_rate": 1.999249691418948e-06,
"loss": 1.193176507949829,
"step": 224
},
{
"epoch": 0.19023569023569023,
"grad_norm": 6.766015529632568,
"learning_rate": 1.999179941919401e-06,
"loss": 0.9458363056182861,
"step": 226
},
{
"epoch": 0.1919191919191919,
"grad_norm": 12.469842910766602,
"learning_rate": 1.999107094366617e-06,
"loss": 1.1906776428222656,
"step": 228
},
{
"epoch": 0.1936026936026936,
"grad_norm": 15.036520004272461,
"learning_rate": 1.9990311490115858e-06,
"loss": 1.3650178909301758,
"step": 230
},
{
"epoch": 0.19528619528619529,
"grad_norm": 5.799370288848877,
"learning_rate": 1.9989521061159715e-06,
"loss": 1.0698531866073608,
"step": 232
},
{
"epoch": 0.19696969696969696,
"grad_norm": 5.714483737945557,
"learning_rate": 1.9988699659521098e-06,
"loss": 1.1641753911972046,
"step": 234
},
{
"epoch": 0.19865319865319866,
"grad_norm": 10.119220733642578,
"learning_rate": 1.9987847288030083e-06,
"loss": 0.9833089113235474,
"step": 236
},
{
"epoch": 0.20033670033670034,
"grad_norm": 3.4788730144500732,
"learning_rate": 1.998696394962345e-06,
"loss": 1.1086716651916504,
"step": 238
},
{
"epoch": 0.20202020202020202,
"grad_norm": 3.9894561767578125,
"learning_rate": 1.998604964734467e-06,
"loss": 0.9258865118026733,
"step": 240
},
{
"epoch": 0.2037037037037037,
"grad_norm": 4.706192970275879,
"learning_rate": 1.99851043843439e-06,
"loss": 1.1667051315307617,
"step": 242
},
{
"epoch": 0.2053872053872054,
"grad_norm": 15.748969078063965,
"learning_rate": 1.9984128163877964e-06,
"loss": 0.9964404106140137,
"step": 244
},
{
"epoch": 0.20707070707070707,
"grad_norm": 9.65405559539795,
"learning_rate": 1.998312098931036e-06,
"loss": 0.6644821166992188,
"step": 246
},
{
"epoch": 0.20875420875420875,
"grad_norm": 13.462628364562988,
"learning_rate": 1.998208286411122e-06,
"loss": 1.2101833820343018,
"step": 248
},
{
"epoch": 0.21043771043771045,
"grad_norm": 2.0463879108428955,
"learning_rate": 1.9981013791857327e-06,
"loss": 0.9958995580673218,
"step": 250
},
{
"epoch": 0.21212121212121213,
"grad_norm": 3.3968567848205566,
"learning_rate": 1.997991377623209e-06,
"loss": 0.8969879150390625,
"step": 252
},
{
"epoch": 0.2138047138047138,
"grad_norm": 17.595094680786133,
"learning_rate": 1.9978782821025513e-06,
"loss": 1.0462696552276611,
"step": 254
},
{
"epoch": 0.21548821548821548,
"grad_norm": 13.578154563903809,
"learning_rate": 1.9977620930134223e-06,
"loss": 1.1988019943237305,
"step": 256
},
{
"epoch": 0.21717171717171718,
"grad_norm": 4.280734062194824,
"learning_rate": 1.9976428107561415e-06,
"loss": 0.8459457755088806,
"step": 258
},
{
"epoch": 0.21885521885521886,
"grad_norm": 2.570441246032715,
"learning_rate": 1.997520435741687e-06,
"loss": 1.0279544591903687,
"step": 260
},
{
"epoch": 0.22053872053872053,
"grad_norm": 6.806192398071289,
"learning_rate": 1.9973949683916927e-06,
"loss": 1.0510814189910889,
"step": 262
},
{
"epoch": 0.2222222222222222,
"grad_norm": 4.318380832672119,
"learning_rate": 1.9972664091384454e-06,
"loss": 1.1062796115875244,
"step": 264
},
{
"epoch": 0.2239057239057239,
"grad_norm": 3.807039976119995,
"learning_rate": 1.997134758424886e-06,
"loss": 1.1960452795028687,
"step": 266
},
{
"epoch": 0.2255892255892256,
"grad_norm": 6.313713550567627,
"learning_rate": 1.9970000167046075e-06,
"loss": 0.6546218991279602,
"step": 268
},
{
"epoch": 0.22727272727272727,
"grad_norm": 3.2756094932556152,
"learning_rate": 1.996862184441851e-06,
"loss": 0.9819681644439697,
"step": 270
},
{
"epoch": 0.22895622895622897,
"grad_norm": 13.153508186340332,
"learning_rate": 1.9967212621115065e-06,
"loss": 1.3135335445404053,
"step": 272
},
{
"epoch": 0.23063973063973064,
"grad_norm": 14.49177074432373,
"learning_rate": 1.996577250199111e-06,
"loss": 1.1486749649047852,
"step": 274
},
{
"epoch": 0.23232323232323232,
"grad_norm": 26.132858276367188,
"learning_rate": 1.9964301492008464e-06,
"loss": 0.9009004831314087,
"step": 276
},
{
"epoch": 0.234006734006734,
"grad_norm": 3.963716506958008,
"learning_rate": 1.996279959623537e-06,
"loss": 1.1650899648666382,
"step": 278
},
{
"epoch": 0.2356902356902357,
"grad_norm": 13.785598754882812,
"learning_rate": 1.9961266819846495e-06,
"loss": 0.9621269702911377,
"step": 280
},
{
"epoch": 0.23737373737373738,
"grad_norm": 6.935214042663574,
"learning_rate": 1.9959703168122897e-06,
"loss": 0.9427906274795532,
"step": 282
},
{
"epoch": 0.23905723905723905,
"grad_norm": 3.0722286701202393,
"learning_rate": 1.995810864645202e-06,
"loss": 1.2749511003494263,
"step": 284
},
{
"epoch": 0.24074074074074073,
"grad_norm": 4.774331092834473,
"learning_rate": 1.995648326032765e-06,
"loss": 0.9315462112426758,
"step": 286
},
{
"epoch": 0.24242424242424243,
"grad_norm": 4.373500823974609,
"learning_rate": 1.9954827015349937e-06,
"loss": 0.8452310562133789,
"step": 288
},
{
"epoch": 0.2441077441077441,
"grad_norm": 9.997944831848145,
"learning_rate": 1.9953139917225333e-06,
"loss": 1.1583993434906006,
"step": 290
},
{
"epoch": 0.24579124579124578,
"grad_norm": 9.785924911499023,
"learning_rate": 1.995142197176661e-06,
"loss": 0.6743492484092712,
"step": 292
},
{
"epoch": 0.2474747474747475,
"grad_norm": 9.52839183807373,
"learning_rate": 1.9949673184892803e-06,
"loss": 1.274944543838501,
"step": 294
},
{
"epoch": 0.24915824915824916,
"grad_norm": 13.619229316711426,
"learning_rate": 1.9947893562629227e-06,
"loss": 1.085368037223816,
"step": 296
},
{
"epoch": 0.25084175084175087,
"grad_norm": 6.220252513885498,
"learning_rate": 1.9946083111107425e-06,
"loss": 0.6333813667297363,
"step": 298
},
{
"epoch": 0.25252525252525254,
"grad_norm": 12.346251487731934,
"learning_rate": 1.9944241836565167e-06,
"loss": 0.7786128520965576,
"step": 300
},
{
"epoch": 0.2542087542087542,
"grad_norm": 10.734468460083008,
"learning_rate": 1.9942369745346417e-06,
"loss": 1.0820167064666748,
"step": 302
},
{
"epoch": 0.2558922558922559,
"grad_norm": 25.510744094848633,
"learning_rate": 1.9940466843901318e-06,
"loss": 0.9161986112594604,
"step": 304
},
{
"epoch": 0.25757575757575757,
"grad_norm": 5.673551559448242,
"learning_rate": 1.9938533138786163e-06,
"loss": 1.3526289463043213,
"step": 306
},
{
"epoch": 0.25925925925925924,
"grad_norm": 11.891182899475098,
"learning_rate": 1.9936568636663383e-06,
"loss": 1.1077102422714233,
"step": 308
},
{
"epoch": 0.2609427609427609,
"grad_norm": 7.852316856384277,
"learning_rate": 1.9934573344301514e-06,
"loss": 1.0809465646743774,
"step": 310
},
{
"epoch": 0.26262626262626265,
"grad_norm": 20.96988296508789,
"learning_rate": 1.993254726857518e-06,
"loss": 1.225387454032898,
"step": 312
},
{
"epoch": 0.26430976430976433,
"grad_norm": 5.888166427612305,
"learning_rate": 1.9930490416465057e-06,
"loss": 1.086962103843689,
"step": 314
},
{
"epoch": 0.265993265993266,
"grad_norm": 2.8382439613342285,
"learning_rate": 1.992840279505787e-06,
"loss": 1.225638508796692,
"step": 316
},
{
"epoch": 0.2676767676767677,
"grad_norm": 4.078027725219727,
"learning_rate": 1.9926284411546355e-06,
"loss": 0.99470055103302,
"step": 318
},
{
"epoch": 0.26936026936026936,
"grad_norm": 14.269658088684082,
"learning_rate": 1.9924135273229235e-06,
"loss": 0.727924108505249,
"step": 320
},
{
"epoch": 0.27104377104377103,
"grad_norm": 3.730602502822876,
"learning_rate": 1.9921955387511195e-06,
"loss": 0.9582691192626953,
"step": 322
},
{
"epoch": 0.2727272727272727,
"grad_norm": 3.153249979019165,
"learning_rate": 1.991974476190285e-06,
"loss": 1.263975977897644,
"step": 324
},
{
"epoch": 0.27441077441077444,
"grad_norm": 2.4196362495422363,
"learning_rate": 1.9917503404020747e-06,
"loss": 1.0396244525909424,
"step": 326
},
{
"epoch": 0.2760942760942761,
"grad_norm": 12.836146354675293,
"learning_rate": 1.9915231321587305e-06,
"loss": 0.8178722262382507,
"step": 328
},
{
"epoch": 0.2777777777777778,
"grad_norm": 5.543509483337402,
"learning_rate": 1.99129285224308e-06,
"loss": 0.9038114547729492,
"step": 330
},
{
"epoch": 0.27946127946127947,
"grad_norm": 5.564317226409912,
"learning_rate": 1.9910595014485347e-06,
"loss": 1.0971403121948242,
"step": 332
},
{
"epoch": 0.28114478114478114,
"grad_norm": 5.212599754333496,
"learning_rate": 1.990823080579086e-06,
"loss": 1.0671043395996094,
"step": 334
},
{
"epoch": 0.2828282828282828,
"grad_norm": 5.401691436767578,
"learning_rate": 1.990583590449303e-06,
"loss": 1.0057094097137451,
"step": 336
},
{
"epoch": 0.2845117845117845,
"grad_norm": 3.39033579826355,
"learning_rate": 1.990341031884331e-06,
"loss": 1.1939620971679688,
"step": 338
},
{
"epoch": 0.28619528619528617,
"grad_norm": 12.433296203613281,
"learning_rate": 1.9900954057198856e-06,
"loss": 0.9549685120582581,
"step": 340
},
{
"epoch": 0.2878787878787879,
"grad_norm": 23.119340896606445,
"learning_rate": 1.989846712802252e-06,
"loss": 1.1277296543121338,
"step": 342
},
{
"epoch": 0.2895622895622896,
"grad_norm": 42.77076721191406,
"learning_rate": 1.9895949539882827e-06,
"loss": 0.8779406547546387,
"step": 344
},
{
"epoch": 0.29124579124579125,
"grad_norm": 2.3723807334899902,
"learning_rate": 1.9893401301453926e-06,
"loss": 1.1096537113189697,
"step": 346
},
{
"epoch": 0.29292929292929293,
"grad_norm": 7.652088165283203,
"learning_rate": 1.989082242151556e-06,
"loss": 1.053053379058838,
"step": 348
},
{
"epoch": 0.2946127946127946,
"grad_norm": 8.224458694458008,
"learning_rate": 1.988821290895307e-06,
"loss": 0.7571377754211426,
"step": 350
},
{
"epoch": 0.2962962962962963,
"grad_norm": 3.486557722091675,
"learning_rate": 1.988557277275732e-06,
"loss": 0.5875279903411865,
"step": 352
},
{
"epoch": 0.29797979797979796,
"grad_norm": 3.368520498275757,
"learning_rate": 1.9882902022024683e-06,
"loss": 1.0230705738067627,
"step": 354
},
{
"epoch": 0.2996632996632997,
"grad_norm": 7.633305549621582,
"learning_rate": 1.9880200665957026e-06,
"loss": 1.0808613300323486,
"step": 356
},
{
"epoch": 0.30134680134680136,
"grad_norm": 15.621920585632324,
"learning_rate": 1.9877468713861656e-06,
"loss": 0.9313445687294006,
"step": 358
},
{
"epoch": 0.30303030303030304,
"grad_norm": 3.499727249145508,
"learning_rate": 1.98747061751513e-06,
"loss": 0.8186299800872803,
"step": 360
},
{
"epoch": 0.3047138047138047,
"grad_norm": 3.564624547958374,
"learning_rate": 1.987191305934406e-06,
"loss": 0.9808353185653687,
"step": 362
},
{
"epoch": 0.3063973063973064,
"grad_norm": 2.6821398735046387,
"learning_rate": 1.98690893760634e-06,
"loss": 1.2293064594268799,
"step": 364
},
{
"epoch": 0.30808080808080807,
"grad_norm": 8.49547004699707,
"learning_rate": 1.9866235135038095e-06,
"loss": 0.8337675333023071,
"step": 366
},
{
"epoch": 0.30976430976430974,
"grad_norm": 3.7763280868530273,
"learning_rate": 1.986335034610221e-06,
"loss": 0.8535688519477844,
"step": 368
},
{
"epoch": 0.3114478114478115,
"grad_norm": 6.456183910369873,
"learning_rate": 1.9860435019195054e-06,
"loss": 1.0865236520767212,
"step": 370
},
{
"epoch": 0.31313131313131315,
"grad_norm": 6.974287509918213,
"learning_rate": 1.9857489164361147e-06,
"loss": 1.2327494621276855,
"step": 372
},
{
"epoch": 0.3148148148148148,
"grad_norm": 12.779848098754883,
"learning_rate": 1.9854512791750214e-06,
"loss": 0.6957528591156006,
"step": 374
},
{
"epoch": 0.3164983164983165,
"grad_norm": 3.759835720062256,
"learning_rate": 1.9851505911617097e-06,
"loss": 0.9909141659736633,
"step": 376
},
{
"epoch": 0.3181818181818182,
"grad_norm": 7.0778608322143555,
"learning_rate": 1.984846853432177e-06,
"loss": 1.3244696855545044,
"step": 378
},
{
"epoch": 0.31986531986531985,
"grad_norm": 24.917316436767578,
"learning_rate": 1.9845400670329275e-06,
"loss": 0.7233332991600037,
"step": 380
},
{
"epoch": 0.32154882154882153,
"grad_norm": 10.7407865524292,
"learning_rate": 1.98423023302097e-06,
"loss": 0.9228682518005371,
"step": 382
},
{
"epoch": 0.32323232323232326,
"grad_norm": 4.701694011688232,
"learning_rate": 1.9839173524638115e-06,
"loss": 1.1106748580932617,
"step": 384
},
{
"epoch": 0.32491582491582494,
"grad_norm": 5.9592976570129395,
"learning_rate": 1.9836014264394587e-06,
"loss": 0.7204115390777588,
"step": 386
},
{
"epoch": 0.3265993265993266,
"grad_norm": 24.467937469482422,
"learning_rate": 1.9832824560364093e-06,
"loss": 0.9101235866546631,
"step": 388
},
{
"epoch": 0.3282828282828283,
"grad_norm": 55.19502258300781,
"learning_rate": 1.98296044235365e-06,
"loss": 1.0853596925735474,
"step": 390
},
{
"epoch": 0.32996632996632996,
"grad_norm": 110.57111358642578,
"learning_rate": 1.9826353865006538e-06,
"loss": 0.7398289442062378,
"step": 392
},
{
"epoch": 0.33164983164983164,
"grad_norm": 6.112462520599365,
"learning_rate": 1.9823072895973748e-06,
"loss": 1.3101907968521118,
"step": 394
},
{
"epoch": 0.3333333333333333,
"grad_norm": 18.562759399414062,
"learning_rate": 1.981976152774245e-06,
"loss": 1.1832518577575684,
"step": 396
},
{
"epoch": 0.335016835016835,
"grad_norm": 10.324470520019531,
"learning_rate": 1.98164197717217e-06,
"loss": 0.7631848454475403,
"step": 398
},
{
"epoch": 0.3367003367003367,
"grad_norm": 5.662529468536377,
"learning_rate": 1.9813047639425253e-06,
"loss": 0.9376566410064697,
"step": 400
},
{
"epoch": 0.3383838383838384,
"grad_norm": 17.23822784423828,
"learning_rate": 1.9809645142471528e-06,
"loss": 0.9629780650138855,
"step": 402
},
{
"epoch": 0.3400673400673401,
"grad_norm": 8.00967025756836,
"learning_rate": 1.980621229258355e-06,
"loss": 1.0150327682495117,
"step": 404
},
{
"epoch": 0.34175084175084175,
"grad_norm": 4.659936904907227,
"learning_rate": 1.9802749101588942e-06,
"loss": 1.0681769847869873,
"step": 406
},
{
"epoch": 0.3434343434343434,
"grad_norm": 5.872868061065674,
"learning_rate": 1.9799255581419844e-06,
"loss": 0.9499913454055786,
"step": 408
},
{
"epoch": 0.3451178451178451,
"grad_norm": 4.6081109046936035,
"learning_rate": 1.9795731744112908e-06,
"loss": 0.5379456877708435,
"step": 410
},
{
"epoch": 0.3468013468013468,
"grad_norm": 4.34984016418457,
"learning_rate": 1.9792177601809234e-06,
"loss": 0.8700510263442993,
"step": 412
},
{
"epoch": 0.3484848484848485,
"grad_norm": 12.086810111999512,
"learning_rate": 1.9788593166754343e-06,
"loss": 0.8910826444625854,
"step": 414
},
{
"epoch": 0.3501683501683502,
"grad_norm": 15.385903358459473,
"learning_rate": 1.9784978451298115e-06,
"loss": 1.1716386079788208,
"step": 416
},
{
"epoch": 0.35185185185185186,
"grad_norm": 7.822863578796387,
"learning_rate": 1.9781333467894773e-06,
"loss": 0.687047004699707,
"step": 418
},
{
"epoch": 0.35353535353535354,
"grad_norm": 10.231508255004883,
"learning_rate": 1.9777658229102807e-06,
"loss": 0.8759807348251343,
"step": 420
},
{
"epoch": 0.3552188552188552,
"grad_norm": 10.260309219360352,
"learning_rate": 1.9773952747584976e-06,
"loss": 1.1332191228866577,
"step": 422
},
{
"epoch": 0.3569023569023569,
"grad_norm": 8.660632133483887,
"learning_rate": 1.9770217036108212e-06,
"loss": 0.5898092985153198,
"step": 424
},
{
"epoch": 0.35858585858585856,
"grad_norm": 24.724945068359375,
"learning_rate": 1.9766451107543614e-06,
"loss": 0.9762297868728638,
"step": 426
},
{
"epoch": 0.3602693602693603,
"grad_norm": 10.698787689208984,
"learning_rate": 1.9762654974866396e-06,
"loss": 0.7858309149742126,
"step": 428
},
{
"epoch": 0.36195286195286197,
"grad_norm": 9.971443176269531,
"learning_rate": 1.975882865115583e-06,
"loss": 1.2292566299438477,
"step": 430
},
{
"epoch": 0.36363636363636365,
"grad_norm": 7.011922359466553,
"learning_rate": 1.9754972149595204e-06,
"loss": 0.9748165607452393,
"step": 432
},
{
"epoch": 0.3653198653198653,
"grad_norm": 12.33168888092041,
"learning_rate": 1.97510854834718e-06,
"loss": 0.8448182940483093,
"step": 434
},
{
"epoch": 0.367003367003367,
"grad_norm": 2.4483745098114014,
"learning_rate": 1.9747168666176813e-06,
"loss": 1.008624792098999,
"step": 436
},
{
"epoch": 0.3686868686868687,
"grad_norm": 10.966385841369629,
"learning_rate": 1.9743221711205323e-06,
"loss": 1.0692952871322632,
"step": 438
},
{
"epoch": 0.37037037037037035,
"grad_norm": 2.965273141860962,
"learning_rate": 1.9739244632156256e-06,
"loss": 0.9337837100028992,
"step": 440
},
{
"epoch": 0.3720538720538721,
"grad_norm": 12.18703556060791,
"learning_rate": 1.973523744273232e-06,
"loss": 0.9473227262496948,
"step": 442
},
{
"epoch": 0.37373737373737376,
"grad_norm": 8.538522720336914,
"learning_rate": 1.973120015673997e-06,
"loss": 0.7716883420944214,
"step": 444
},
{
"epoch": 0.37542087542087543,
"grad_norm": 11.410622596740723,
"learning_rate": 1.9727132788089354e-06,
"loss": 0.6292431354522705,
"step": 446
},
{
"epoch": 0.3771043771043771,
"grad_norm": 3.9945926666259766,
"learning_rate": 1.972303535079427e-06,
"loss": 1.1218082904815674,
"step": 448
},
{
"epoch": 0.3787878787878788,
"grad_norm": 19.375045776367188,
"learning_rate": 1.971890785897211e-06,
"loss": 1.007505178451538,
"step": 450
},
{
"epoch": 0.38047138047138046,
"grad_norm": 3.713459014892578,
"learning_rate": 1.9714750326843825e-06,
"loss": 0.7216253280639648,
"step": 452
},
{
"epoch": 0.38215488215488214,
"grad_norm": 6.826941013336182,
"learning_rate": 1.9710562768733857e-06,
"loss": 0.9892054796218872,
"step": 454
},
{
"epoch": 0.3838383838383838,
"grad_norm": 7.63702392578125,
"learning_rate": 1.9706345199070107e-06,
"loss": 0.7905744314193726,
"step": 456
},
{
"epoch": 0.38552188552188554,
"grad_norm": 11.529894828796387,
"learning_rate": 1.970209763238388e-06,
"loss": 0.9695171117782593,
"step": 458
},
{
"epoch": 0.3872053872053872,
"grad_norm": 2.9292163848876953,
"learning_rate": 1.969782008330983e-06,
"loss": 1.1221948862075806,
"step": 460
},
{
"epoch": 0.3888888888888889,
"grad_norm": 4.672982215881348,
"learning_rate": 1.969351256658591e-06,
"loss": 0.8763028979301453,
"step": 462
},
{
"epoch": 0.39057239057239057,
"grad_norm": 4.81404972076416,
"learning_rate": 1.968917509705333e-06,
"loss": 0.8340336680412292,
"step": 464
},
{
"epoch": 0.39225589225589225,
"grad_norm": 19.125089645385742,
"learning_rate": 1.9684807689656497e-06,
"loss": 0.9119417071342468,
"step": 466
},
{
"epoch": 0.3939393939393939,
"grad_norm": 2.594858407974243,
"learning_rate": 1.9680410359442972e-06,
"loss": 0.9458074569702148,
"step": 468
},
{
"epoch": 0.3956228956228956,
"grad_norm": 3.8974621295928955,
"learning_rate": 1.9675983121563397e-06,
"loss": 0.9553569555282593,
"step": 470
},
{
"epoch": 0.39730639730639733,
"grad_norm": 6.4163641929626465,
"learning_rate": 1.9671525991271478e-06,
"loss": 0.7942986488342285,
"step": 472
},
{
"epoch": 0.398989898989899,
"grad_norm": 3.718247890472412,
"learning_rate": 1.9667038983923902e-06,
"loss": 0.9940693378448486,
"step": 474
},
{
"epoch": 0.4006734006734007,
"grad_norm": 25.65456199645996,
"learning_rate": 1.9662522114980296e-06,
"loss": 0.7515483498573303,
"step": 476
},
{
"epoch": 0.40235690235690236,
"grad_norm": 3.7314107418060303,
"learning_rate": 1.965797540000318e-06,
"loss": 0.9622472524642944,
"step": 478
},
{
"epoch": 0.40404040404040403,
"grad_norm": 16.488338470458984,
"learning_rate": 1.9653398854657887e-06,
"loss": 1.041235089302063,
"step": 480
},
{
"epoch": 0.4057239057239057,
"grad_norm": 8.276439666748047,
"learning_rate": 1.9648792494712553e-06,
"loss": 1.0389721393585205,
"step": 482
},
{
"epoch": 0.4074074074074074,
"grad_norm": 10.357524871826172,
"learning_rate": 1.9644156336038024e-06,
"loss": 0.8473480343818665,
"step": 484
},
{
"epoch": 0.4090909090909091,
"grad_norm": 12.934167861938477,
"learning_rate": 1.9639490394607813e-06,
"loss": 0.8664846420288086,
"step": 486
},
{
"epoch": 0.4107744107744108,
"grad_norm": 2.63865327835083,
"learning_rate": 1.9634794686498055e-06,
"loss": 1.0735490322113037,
"step": 488
},
{
"epoch": 0.41245791245791247,
"grad_norm": 9.611379623413086,
"learning_rate": 1.9630069227887444e-06,
"loss": 1.097601294517517,
"step": 490
},
{
"epoch": 0.41414141414141414,
"grad_norm": 37.54718780517578,
"learning_rate": 1.9625314035057167e-06,
"loss": 1.0461905002593994,
"step": 492
},
{
"epoch": 0.4158249158249158,
"grad_norm": 5.95384407043457,
"learning_rate": 1.9620529124390863e-06,
"loss": 0.9309274554252625,
"step": 494
},
{
"epoch": 0.4175084175084175,
"grad_norm": 5.1661763191223145,
"learning_rate": 1.9615714512374567e-06,
"loss": 1.0628364086151123,
"step": 496
},
{
"epoch": 0.41919191919191917,
"grad_norm": 4.157014846801758,
"learning_rate": 1.9610870215596643e-06,
"loss": 1.0677950382232666,
"step": 498
},
{
"epoch": 0.4208754208754209,
"grad_norm": 6.916998863220215,
"learning_rate": 1.960599625074773e-06,
"loss": 0.8103325366973877,
"step": 500
},
{
"epoch": 0.4225589225589226,
"grad_norm": 6.891815185546875,
"learning_rate": 1.9601092634620687e-06,
"loss": 0.6272333264350891,
"step": 502
},
{
"epoch": 0.42424242424242425,
"grad_norm": 9.089258193969727,
"learning_rate": 1.9596159384110535e-06,
"loss": 0.8941874504089355,
"step": 504
},
{
"epoch": 0.42592592592592593,
"grad_norm": 16.94425392150879,
"learning_rate": 1.95911965162144e-06,
"loss": 0.938546359539032,
"step": 506
},
{
"epoch": 0.4276094276094276,
"grad_norm": 15.095925331115723,
"learning_rate": 1.958620404803145e-06,
"loss": 1.293353796005249,
"step": 508
},
{
"epoch": 0.4292929292929293,
"grad_norm": 3.3025577068328857,
"learning_rate": 1.9581181996762834e-06,
"loss": 1.0367740392684937,
"step": 510
},
{
"epoch": 0.43097643097643096,
"grad_norm": 3.0691745281219482,
"learning_rate": 1.9576130379711634e-06,
"loss": 1.178546667098999,
"step": 512
},
{
"epoch": 0.43265993265993263,
"grad_norm": 3.2468979358673096,
"learning_rate": 1.95710492142828e-06,
"loss": 1.115210771560669,
"step": 514
},
{
"epoch": 0.43434343434343436,
"grad_norm": 12.401965141296387,
"learning_rate": 1.956593851798308e-06,
"loss": 1.0290696620941162,
"step": 516
},
{
"epoch": 0.43602693602693604,
"grad_norm": 8.208135604858398,
"learning_rate": 1.9560798308420974e-06,
"loss": 1.0394536256790161,
"step": 518
},
{
"epoch": 0.4377104377104377,
"grad_norm": 15.533670425415039,
"learning_rate": 1.955562860330667e-06,
"loss": 0.9136192798614502,
"step": 520
},
{
"epoch": 0.4393939393939394,
"grad_norm": 3.0875625610351562,
"learning_rate": 1.9550429420451973e-06,
"loss": 0.7975887060165405,
"step": 522
},
{
"epoch": 0.44107744107744107,
"grad_norm": 8.5232572555542,
"learning_rate": 1.954520077777026e-06,
"loss": 1.1077611446380615,
"step": 524
},
{
"epoch": 0.44276094276094274,
"grad_norm": 11.362956047058105,
"learning_rate": 1.9539942693276405e-06,
"loss": 0.7790743112564087,
"step": 526
},
{
"epoch": 0.4444444444444444,
"grad_norm": 2.6764779090881348,
"learning_rate": 1.9534655185086717e-06,
"loss": 1.1893084049224854,
"step": 528
},
{
"epoch": 0.44612794612794615,
"grad_norm": 11.054378509521484,
"learning_rate": 1.9529338271418886e-06,
"loss": 0.8206809759140015,
"step": 530
},
{
"epoch": 0.4478114478114478,
"grad_norm": 15.93736743927002,
"learning_rate": 1.952399197059192e-06,
"loss": 0.8338401317596436,
"step": 532
},
{
"epoch": 0.4494949494949495,
"grad_norm": 5.404129505157471,
"learning_rate": 1.9518616301026077e-06,
"loss": 0.9456153512001038,
"step": 534
},
{
"epoch": 0.4511784511784512,
"grad_norm": 4.291036128997803,
"learning_rate": 1.9513211281242795e-06,
"loss": 1.2254921197891235,
"step": 536
},
{
"epoch": 0.45286195286195285,
"grad_norm": 7.2202582359313965,
"learning_rate": 1.9507776929864643e-06,
"loss": 1.092686653137207,
"step": 538
},
{
"epoch": 0.45454545454545453,
"grad_norm": 8.635713577270508,
"learning_rate": 1.950231326561525e-06,
"loss": 0.8675233125686646,
"step": 540
},
{
"epoch": 0.4562289562289562,
"grad_norm": 8.679670333862305,
"learning_rate": 1.9496820307319237e-06,
"loss": 1.0159896612167358,
"step": 542
},
{
"epoch": 0.45791245791245794,
"grad_norm": 3.453657865524292,
"learning_rate": 1.9491298073902157e-06,
"loss": 1.118143081665039,
"step": 544
},
{
"epoch": 0.4595959595959596,
"grad_norm": 7.604466438293457,
"learning_rate": 1.9485746584390426e-06,
"loss": 1.1383062601089478,
"step": 546
},
{
"epoch": 0.4612794612794613,
"grad_norm": 10.454069137573242,
"learning_rate": 1.948016585791127e-06,
"loss": 1.3462685346603394,
"step": 548
},
{
"epoch": 0.46296296296296297,
"grad_norm": 7.511162757873535,
"learning_rate": 1.9474555913692627e-06,
"loss": 0.8798332214355469,
"step": 550
},
{
"epoch": 0.46464646464646464,
"grad_norm": 22.986238479614258,
"learning_rate": 1.946891677106312e-06,
"loss": 0.8471826314926147,
"step": 552
},
{
"epoch": 0.4663299663299663,
"grad_norm": 4.494133949279785,
"learning_rate": 1.946324844945197e-06,
"loss": 1.0384173393249512,
"step": 554
},
{
"epoch": 0.468013468013468,
"grad_norm": 9.850350379943848,
"learning_rate": 1.9457550968388928e-06,
"loss": 0.7141643166542053,
"step": 556
},
{
"epoch": 0.4696969696969697,
"grad_norm": 6.887972831726074,
"learning_rate": 1.9451824347504213e-06,
"loss": 1.190050721168518,
"step": 558
},
{
"epoch": 0.4713804713804714,
"grad_norm": 5.237252235412598,
"learning_rate": 1.944606860652845e-06,
"loss": 0.41058096289634705,
"step": 560
},
{
"epoch": 0.4730639730639731,
"grad_norm": 15.578932762145996,
"learning_rate": 1.944028376529258e-06,
"loss": 0.598914384841919,
"step": 562
},
{
"epoch": 0.47474747474747475,
"grad_norm": 3.727078437805176,
"learning_rate": 1.943446984372782e-06,
"loss": 1.2833001613616943,
"step": 564
},
{
"epoch": 0.4764309764309764,
"grad_norm": 8.145559310913086,
"learning_rate": 1.942862686186557e-06,
"loss": 1.1502578258514404,
"step": 566
},
{
"epoch": 0.4781144781144781,
"grad_norm": 8.36186408996582,
"learning_rate": 1.9422754839837366e-06,
"loss": 0.45712798833847046,
"step": 568
},
{
"epoch": 0.4797979797979798,
"grad_norm": 32.920475006103516,
"learning_rate": 1.9416853797874797e-06,
"loss": 1.1332796812057495,
"step": 570
},
{
"epoch": 0.48148148148148145,
"grad_norm": 17.55156135559082,
"learning_rate": 1.941092375630943e-06,
"loss": 0.6961038112640381,
"step": 572
},
{
"epoch": 0.4831649831649832,
"grad_norm": 4.492574214935303,
"learning_rate": 1.9404964735572754e-06,
"loss": 0.9653905630111694,
"step": 574
},
{
"epoch": 0.48484848484848486,
"grad_norm": 6.348426818847656,
"learning_rate": 1.939897675619611e-06,
"loss": 0.871944785118103,
"step": 576
},
{
"epoch": 0.48653198653198654,
"grad_norm": 25.369014739990234,
"learning_rate": 1.9392959838810597e-06,
"loss": 1.0709469318389893,
"step": 578
},
{
"epoch": 0.4882154882154882,
"grad_norm": 10.82548999786377,
"learning_rate": 1.9386914004147034e-06,
"loss": 0.7998636960983276,
"step": 580
},
{
"epoch": 0.4898989898989899,
"grad_norm": 10.758012771606445,
"learning_rate": 1.938083927303586e-06,
"loss": 1.3598113059997559,
"step": 582
},
{
"epoch": 0.49158249158249157,
"grad_norm": 6.756187915802002,
"learning_rate": 1.937473566640708e-06,
"loss": 0.9948703050613403,
"step": 584
},
{
"epoch": 0.49326599326599324,
"grad_norm": 2.756861686706543,
"learning_rate": 1.9368603205290196e-06,
"loss": 0.8475466966629028,
"step": 586
},
{
"epoch": 0.494949494949495,
"grad_norm": 5.148032188415527,
"learning_rate": 1.9362441910814105e-06,
"loss": 0.6347664594650269,
"step": 588
},
{
"epoch": 0.49663299663299665,
"grad_norm": 2.980475425720215,
"learning_rate": 1.935625180420706e-06,
"loss": 1.1008853912353516,
"step": 590
},
{
"epoch": 0.4983164983164983,
"grad_norm": 3.5861027240753174,
"learning_rate": 1.935003290679659e-06,
"loss": 1.1105575561523438,
"step": 592
},
{
"epoch": 0.5,
"grad_norm": 37.69801712036133,
"learning_rate": 1.934378524000941e-06,
"loss": 0.7997324466705322,
"step": 594
},
{
"epoch": 0.5016835016835017,
"grad_norm": 10.022683143615723,
"learning_rate": 1.933750882537136e-06,
"loss": 0.9395183324813843,
"step": 596
},
{
"epoch": 0.5033670033670034,
"grad_norm": 3.6454007625579834,
"learning_rate": 1.9331203684507333e-06,
"loss": 1.2922556400299072,
"step": 598
},
{
"epoch": 0.5050505050505051,
"grad_norm": 11.494460105895996,
"learning_rate": 1.9324869839141184e-06,
"loss": 0.7769290804862976,
"step": 600
},
{
"epoch": 0.5067340067340067,
"grad_norm": 101.31135559082031,
"learning_rate": 1.9318507311095686e-06,
"loss": 1.0425605773925781,
"step": 602
},
{
"epoch": 0.5084175084175084,
"grad_norm": 29.326383590698242,
"learning_rate": 1.9312116122292414e-06,
"loss": 1.0084577798843384,
"step": 604
},
{
"epoch": 0.51010101010101,
"grad_norm": 4.6560163497924805,
"learning_rate": 1.9305696294751707e-06,
"loss": 1.0687224864959717,
"step": 606
},
{
"epoch": 0.5117845117845118,
"grad_norm": 31.829082489013672,
"learning_rate": 1.9299247850592575e-06,
"loss": 0.5714974999427795,
"step": 608
},
{
"epoch": 0.5134680134680135,
"grad_norm": 3.3935041427612305,
"learning_rate": 1.9292770812032626e-06,
"loss": 0.9293146133422852,
"step": 610
},
{
"epoch": 0.5151515151515151,
"grad_norm": 35.04014587402344,
"learning_rate": 1.9286265201387966e-06,
"loss": 0.8598051071166992,
"step": 612
},
{
"epoch": 0.5168350168350169,
"grad_norm": 5.506503105163574,
"learning_rate": 1.9279731041073177e-06,
"loss": 0.7148240804672241,
"step": 614
},
{
"epoch": 0.5185185185185185,
"grad_norm": 7.014071941375732,
"learning_rate": 1.9273168353601185e-06,
"loss": 1.0927050113677979,
"step": 616
},
{
"epoch": 0.5202020202020202,
"grad_norm": 11.175944328308105,
"learning_rate": 1.9266577161583207e-06,
"loss": 1.0155811309814453,
"step": 618
},
{
"epoch": 0.5218855218855218,
"grad_norm": 4.795597076416016,
"learning_rate": 1.925995748772868e-06,
"loss": 0.9794735312461853,
"step": 620
},
{
"epoch": 0.5235690235690236,
"grad_norm": 24.483413696289062,
"learning_rate": 1.925330935484516e-06,
"loss": 1.045680284500122,
"step": 622
},
{
"epoch": 0.5252525252525253,
"grad_norm": 2.9763712882995605,
"learning_rate": 1.9246632785838263e-06,
"loss": 0.7627449631690979,
"step": 624
},
{
"epoch": 0.5269360269360269,
"grad_norm": 19.479745864868164,
"learning_rate": 1.9239927803711578e-06,
"loss": 0.945065975189209,
"step": 626
},
{
"epoch": 0.5286195286195287,
"grad_norm": 2.6288349628448486,
"learning_rate": 1.923319443156659e-06,
"loss": 0.839026153087616,
"step": 628
},
{
"epoch": 0.5303030303030303,
"grad_norm": 14.550789833068848,
"learning_rate": 1.92264326926026e-06,
"loss": 0.7562347054481506,
"step": 630
},
{
"epoch": 0.531986531986532,
"grad_norm": 7.969823360443115,
"learning_rate": 1.9219642610116647e-06,
"loss": 1.1040418148040771,
"step": 632
},
{
"epoch": 0.5336700336700336,
"grad_norm": 9.72048568725586,
"learning_rate": 1.9212824207503415e-06,
"loss": 0.9238873720169067,
"step": 634
},
{
"epoch": 0.5353535353535354,
"grad_norm": 4.213377475738525,
"learning_rate": 1.920597750825517e-06,
"loss": 0.8101857900619507,
"step": 636
},
{
"epoch": 0.5370370370370371,
"grad_norm": 13.104752540588379,
"learning_rate": 1.919910253596168e-06,
"loss": 0.9694643020629883,
"step": 638
},
{
"epoch": 0.5387205387205387,
"grad_norm": 10.729632377624512,
"learning_rate": 1.919219931431011e-06,
"loss": 0.8188080191612244,
"step": 640
},
{
"epoch": 0.5404040404040404,
"grad_norm": 4.642938613891602,
"learning_rate": 1.918526786708497e-06,
"loss": 0.944012463092804,
"step": 642
},
{
"epoch": 0.5420875420875421,
"grad_norm": 4.087347984313965,
"learning_rate": 1.9178308218168e-06,
"loss": 0.8914910554885864,
"step": 644
},
{
"epoch": 0.5437710437710438,
"grad_norm": 3.8000528812408447,
"learning_rate": 1.9171320391538132e-06,
"loss": 0.893518328666687,
"step": 646
},
{
"epoch": 0.5454545454545454,
"grad_norm": 9.262425422668457,
"learning_rate": 1.9164304411271364e-06,
"loss": 0.984040379524231,
"step": 648
},
{
"epoch": 0.5471380471380471,
"grad_norm": 10.015108108520508,
"learning_rate": 1.9157260301540697e-06,
"loss": 1.140836477279663,
"step": 650
},
{
"epoch": 0.5488215488215489,
"grad_norm": 160.21282958984375,
"learning_rate": 1.9150188086616055e-06,
"loss": 1.0449649095535278,
"step": 652
},
{
"epoch": 0.5505050505050505,
"grad_norm": 4.650694847106934,
"learning_rate": 1.91430877908642e-06,
"loss": 1.0726298093795776,
"step": 654
},
{
"epoch": 0.5521885521885522,
"grad_norm": 11.116467475891113,
"learning_rate": 1.9135959438748626e-06,
"loss": 0.9272226095199585,
"step": 656
},
{
"epoch": 0.5538720538720538,
"grad_norm": 7.265547752380371,
"learning_rate": 1.9128803054829515e-06,
"loss": 0.7893900871276855,
"step": 658
},
{
"epoch": 0.5555555555555556,
"grad_norm": 35.09156799316406,
"learning_rate": 1.912161866376362e-06,
"loss": 0.7798557281494141,
"step": 660
},
{
"epoch": 0.5572390572390572,
"grad_norm": 3.4387574195861816,
"learning_rate": 1.9114406290304186e-06,
"loss": 1.0308525562286377,
"step": 662
},
{
"epoch": 0.5589225589225589,
"grad_norm": 3.3560092449188232,
"learning_rate": 1.910716595930088e-06,
"loss": 1.0922589302062988,
"step": 664
},
{
"epoch": 0.5606060606060606,
"grad_norm": 12.50266170501709,
"learning_rate": 1.9099897695699684e-06,
"loss": 0.4920412600040436,
"step": 666
},
{
"epoch": 0.5622895622895623,
"grad_norm": 5.19976282119751,
"learning_rate": 1.9092601524542828e-06,
"loss": 0.6655771136283875,
"step": 668
},
{
"epoch": 0.563973063973064,
"grad_norm": 17.65725326538086,
"learning_rate": 1.9085277470968692e-06,
"loss": 1.0704545974731445,
"step": 670
},
{
"epoch": 0.5656565656565656,
"grad_norm": 13.295573234558105,
"learning_rate": 1.907792556021171e-06,
"loss": 0.5930483341217041,
"step": 672
},
{
"epoch": 0.5673400673400674,
"grad_norm": 5.582085609436035,
"learning_rate": 1.9070545817602328e-06,
"loss": 0.5818225145339966,
"step": 674
},
{
"epoch": 0.569023569023569,
"grad_norm": 7.926098823547363,
"learning_rate": 1.9063138268566851e-06,
"loss": 0.6757692098617554,
"step": 676
},
{
"epoch": 0.5707070707070707,
"grad_norm": 9.610929489135742,
"learning_rate": 1.9055702938627407e-06,
"loss": 1.3059725761413574,
"step": 678
},
{
"epoch": 0.5723905723905723,
"grad_norm": 14.765951156616211,
"learning_rate": 1.9048239853401833e-06,
"loss": 0.42610985040664673,
"step": 680
},
{
"epoch": 0.5740740740740741,
"grad_norm": 6.197120189666748,
"learning_rate": 1.9040749038603602e-06,
"loss": 1.0255128145217896,
"step": 682
},
{
"epoch": 0.5757575757575758,
"grad_norm": 6.4059038162231445,
"learning_rate": 1.9033230520041719e-06,
"loss": 1.1382319927215576,
"step": 684
},
{
"epoch": 0.5774410774410774,
"grad_norm": 6.532130241394043,
"learning_rate": 1.9025684323620645e-06,
"loss": 1.1159263849258423,
"step": 686
},
{
"epoch": 0.5791245791245792,
"grad_norm": 2.4945201873779297,
"learning_rate": 1.9018110475340203e-06,
"loss": 0.8307312726974487,
"step": 688
},
{
"epoch": 0.5808080808080808,
"grad_norm": 20.23617935180664,
"learning_rate": 1.9010509001295485e-06,
"loss": 0.7440475821495056,
"step": 690
},
{
"epoch": 0.5824915824915825,
"grad_norm": 4.1981072425842285,
"learning_rate": 1.9002879927676767e-06,
"loss": 0.8382600545883179,
"step": 692
},
{
"epoch": 0.5841750841750841,
"grad_norm": 2.907876491546631,
"learning_rate": 1.8995223280769424e-06,
"loss": 0.9814774990081787,
"step": 694
},
{
"epoch": 0.5858585858585859,
"grad_norm": 5.83011531829834,
"learning_rate": 1.8987539086953819e-06,
"loss": 0.8996963500976562,
"step": 696
},
{
"epoch": 0.5875420875420876,
"grad_norm": 8.185150146484375,
"learning_rate": 1.8979827372705233e-06,
"loss": 0.8781136274337769,
"step": 698
},
{
"epoch": 0.5892255892255892,
"grad_norm": 9.394926071166992,
"learning_rate": 1.8972088164593771e-06,
"loss": 0.8234498500823975,
"step": 700
},
{
"epoch": 0.5909090909090909,
"grad_norm": 15.942888259887695,
"learning_rate": 1.896432148928426e-06,
"loss": 0.9446474313735962,
"step": 702
},
{
"epoch": 0.5925925925925926,
"grad_norm": 4.5268330574035645,
"learning_rate": 1.895652737353616e-06,
"loss": 1.0645607709884644,
"step": 704
},
{
"epoch": 0.5942760942760943,
"grad_norm": 6.5960612297058105,
"learning_rate": 1.8948705844203482e-06,
"loss": 0.9992242455482483,
"step": 706
},
{
"epoch": 0.5959595959595959,
"grad_norm": 25.13721466064453,
"learning_rate": 1.8940856928234689e-06,
"loss": 0.746535062789917,
"step": 708
},
{
"epoch": 0.5976430976430976,
"grad_norm": 6.828306674957275,
"learning_rate": 1.8932980652672597e-06,
"loss": 0.8305199146270752,
"step": 710
},
{
"epoch": 0.5993265993265994,
"grad_norm": 5.863089561462402,
"learning_rate": 1.8925077044654288e-06,
"loss": 1.1452956199645996,
"step": 712
},
{
"epoch": 0.601010101010101,
"grad_norm": 3.158170700073242,
"learning_rate": 1.8917146131411015e-06,
"loss": 1.0598926544189453,
"step": 714
},
{
"epoch": 0.6026936026936027,
"grad_norm": 6.218857288360596,
"learning_rate": 1.8909187940268115e-06,
"loss": 0.7409163117408752,
"step": 716
},
{
"epoch": 0.6043771043771043,
"grad_norm": 6.748631000518799,
"learning_rate": 1.89012024986449e-06,
"loss": 0.9013140201568604,
"step": 718
},
{
"epoch": 0.6060606060606061,
"grad_norm": 4.563135623931885,
"learning_rate": 1.8893189834054586e-06,
"loss": 0.9499297738075256,
"step": 720
},
{
"epoch": 0.6077441077441077,
"grad_norm": 12.914100646972656,
"learning_rate": 1.8885149974104164e-06,
"loss": 0.9684711694717407,
"step": 722
},
{
"epoch": 0.6094276094276094,
"grad_norm": 16.68248748779297,
"learning_rate": 1.8877082946494339e-06,
"loss": 0.8916200995445251,
"step": 724
},
{
"epoch": 0.6111111111111112,
"grad_norm": 31.8973388671875,
"learning_rate": 1.8868988779019414e-06,
"loss": 0.9836832284927368,
"step": 726
},
{
"epoch": 0.6127946127946128,
"grad_norm": 38.546356201171875,
"learning_rate": 1.8860867499567203e-06,
"loss": 0.8979325294494629,
"step": 728
},
{
"epoch": 0.6144781144781145,
"grad_norm": 3.1298513412475586,
"learning_rate": 1.885271913611893e-06,
"loss": 1.1511611938476562,
"step": 730
},
{
"epoch": 0.6161616161616161,
"grad_norm": 4.0303263664245605,
"learning_rate": 1.8844543716749134e-06,
"loss": 1.0997979640960693,
"step": 732
},
{
"epoch": 0.6178451178451179,
"grad_norm": 4.650604724884033,
"learning_rate": 1.8836341269625578e-06,
"loss": 0.7802401781082153,
"step": 734
},
{
"epoch": 0.6195286195286195,
"grad_norm": 8.960386276245117,
"learning_rate": 1.882811182300914e-06,
"loss": 0.8063424229621887,
"step": 736
},
{
"epoch": 0.6212121212121212,
"grad_norm": 20.323410034179688,
"learning_rate": 1.881985540525373e-06,
"loss": 0.689705491065979,
"step": 738
},
{
"epoch": 0.622895622895623,
"grad_norm": 4.956573963165283,
"learning_rate": 1.8811572044806178e-06,
"loss": 1.2354564666748047,
"step": 740
},
{
"epoch": 0.6245791245791246,
"grad_norm": 4.285037040710449,
"learning_rate": 1.8803261770206149e-06,
"loss": 1.0013043880462646,
"step": 742
},
{
"epoch": 0.6262626262626263,
"grad_norm": 2.563471794128418,
"learning_rate": 1.8794924610086031e-06,
"loss": 1.2029197216033936,
"step": 744
},
{
"epoch": 0.6279461279461279,
"grad_norm": 2.987870216369629,
"learning_rate": 1.8786560593170854e-06,
"loss": 0.9561195969581604,
"step": 746
},
{
"epoch": 0.6296296296296297,
"grad_norm": 3.021315336227417,
"learning_rate": 1.877816974827817e-06,
"loss": 1.202516794204712,
"step": 748
},
{
"epoch": 0.6313131313131313,
"grad_norm": 3.505037307739258,
"learning_rate": 1.8769752104317973e-06,
"loss": 1.2894848585128784,
"step": 750
},
{
"epoch": 0.632996632996633,
"grad_norm": 8.464410781860352,
"learning_rate": 1.8761307690292589e-06,
"loss": 0.7271798849105835,
"step": 752
},
{
"epoch": 0.6346801346801347,
"grad_norm": 26.4637508392334,
"learning_rate": 1.875283653529658e-06,
"loss": 0.9941682815551758,
"step": 754
},
{
"epoch": 0.6363636363636364,
"grad_norm": 2.6587889194488525,
"learning_rate": 1.874433866851663e-06,
"loss": 0.7514116168022156,
"step": 756
},
{
"epoch": 0.6380471380471381,
"grad_norm": 10.891627311706543,
"learning_rate": 1.8735814119231475e-06,
"loss": 0.8671576976776123,
"step": 758
},
{
"epoch": 0.6397306397306397,
"grad_norm": 25.072734832763672,
"learning_rate": 1.872726291681177e-06,
"loss": 0.6143717169761658,
"step": 760
},
{
"epoch": 0.6414141414141414,
"grad_norm": 4.057854175567627,
"learning_rate": 1.8718685090720004e-06,
"loss": 0.46186384558677673,
"step": 762
},
{
"epoch": 0.6430976430976431,
"grad_norm": 10.258670806884766,
"learning_rate": 1.8710080670510402e-06,
"loss": 1.0092180967330933,
"step": 764
},
{
"epoch": 0.6447811447811448,
"grad_norm": 4.200110912322998,
"learning_rate": 1.8701449685828806e-06,
"loss": 1.0899416208267212,
"step": 766
},
{
"epoch": 0.6464646464646465,
"grad_norm": 10.581267356872559,
"learning_rate": 1.8692792166412595e-06,
"loss": 0.7667125463485718,
"step": 768
},
{
"epoch": 0.6481481481481481,
"grad_norm": 5.673297882080078,
"learning_rate": 1.8684108142090562e-06,
"loss": 0.7934967279434204,
"step": 770
},
{
"epoch": 0.6498316498316499,
"grad_norm": 3.9210774898529053,
"learning_rate": 1.8675397642782827e-06,
"loss": 0.7912408113479614,
"step": 772
},
{
"epoch": 0.6515151515151515,
"grad_norm": 12.99809455871582,
"learning_rate": 1.8666660698500726e-06,
"loss": 0.6966930627822876,
"step": 774
},
{
"epoch": 0.6531986531986532,
"grad_norm": 2.608152389526367,
"learning_rate": 1.8657897339346707e-06,
"loss": 0.9161090850830078,
"step": 776
},
{
"epoch": 0.6548821548821548,
"grad_norm": 4.8470282554626465,
"learning_rate": 1.8649107595514226e-06,
"loss": 1.050070881843567,
"step": 778
},
{
"epoch": 0.6565656565656566,
"grad_norm": 38.622154235839844,
"learning_rate": 1.8640291497287654e-06,
"loss": 0.948337197303772,
"step": 780
},
{
"epoch": 0.6582491582491582,
"grad_norm": 19.695106506347656,
"learning_rate": 1.8631449075042156e-06,
"loss": 1.065544605255127,
"step": 782
},
{
"epoch": 0.6599326599326599,
"grad_norm": 6.196758270263672,
"learning_rate": 1.8622580359243601e-06,
"loss": 0.9903167486190796,
"step": 784
},
{
"epoch": 0.6616161616161617,
"grad_norm": 11.652655601501465,
"learning_rate": 1.8613685380448441e-06,
"loss": 1.0705502033233643,
"step": 786
},
{
"epoch": 0.6632996632996633,
"grad_norm": 21.967121124267578,
"learning_rate": 1.8604764169303626e-06,
"loss": 0.8703781366348267,
"step": 788
},
{
"epoch": 0.664983164983165,
"grad_norm": 2.8076608180999756,
"learning_rate": 1.8595816756546477e-06,
"loss": 0.9413682222366333,
"step": 790
},
{
"epoch": 0.6666666666666666,
"grad_norm": 12.699344635009766,
"learning_rate": 1.8586843173004598e-06,
"loss": 0.9941300749778748,
"step": 792
},
{
"epoch": 0.6683501683501684,
"grad_norm": 2.5356881618499756,
"learning_rate": 1.8577843449595763e-06,
"loss": 0.6315573453903198,
"step": 794
},
{
"epoch": 0.67003367003367,
"grad_norm": 3.684738874435425,
"learning_rate": 1.85688176173278e-06,
"loss": 0.9797836542129517,
"step": 796
},
{
"epoch": 0.6717171717171717,
"grad_norm": 4.553958415985107,
"learning_rate": 1.8559765707298502e-06,
"loss": 1.0133525133132935,
"step": 798
},
{
"epoch": 0.6734006734006734,
"grad_norm": 5.8083367347717285,
"learning_rate": 1.8550687750695509e-06,
"loss": 0.635034441947937,
"step": 800
},
{
"epoch": 0.6750841750841751,
"grad_norm": 2.6168251037597656,
"learning_rate": 1.8541583778796196e-06,
"loss": 0.9916131496429443,
"step": 802
},
{
"epoch": 0.6767676767676768,
"grad_norm": 10.899927139282227,
"learning_rate": 1.8532453822967584e-06,
"loss": 0.7682900428771973,
"step": 804
},
{
"epoch": 0.6784511784511784,
"grad_norm": 11.195059776306152,
"learning_rate": 1.8523297914666207e-06,
"loss": 0.6411112546920776,
"step": 806
},
{
"epoch": 0.6801346801346801,
"grad_norm": 8.76089859008789,
"learning_rate": 1.8514116085438027e-06,
"loss": 1.0669599771499634,
"step": 808
},
{
"epoch": 0.6818181818181818,
"grad_norm": 2.9080264568328857,
"learning_rate": 1.8504908366918302e-06,
"loss": 0.9828901886940002,
"step": 810
},
{
"epoch": 0.6835016835016835,
"grad_norm": 4.848678112030029,
"learning_rate": 1.84956747908315e-06,
"loss": 1.1542444229125977,
"step": 812
},
{
"epoch": 0.6851851851851852,
"grad_norm": 6.960413932800293,
"learning_rate": 1.8486415388991173e-06,
"loss": 0.5982141494750977,
"step": 814
},
{
"epoch": 0.6868686868686869,
"grad_norm": 2.6384944915771484,
"learning_rate": 1.8477130193299863e-06,
"loss": 1.1131889820098877,
"step": 816
},
{
"epoch": 0.6885521885521886,
"grad_norm": 9.800881385803223,
"learning_rate": 1.846781923574897e-06,
"loss": 0.7944687604904175,
"step": 818
},
{
"epoch": 0.6902356902356902,
"grad_norm": 40.63787078857422,
"learning_rate": 1.8458482548418661e-06,
"loss": 0.7440886497497559,
"step": 820
},
{
"epoch": 0.6919191919191919,
"grad_norm": 3.366387367248535,
"learning_rate": 1.8449120163477753e-06,
"loss": 0.7828149199485779,
"step": 822
},
{
"epoch": 0.6936026936026936,
"grad_norm": 4.786665916442871,
"learning_rate": 1.8439732113183607e-06,
"loss": 0.8565751314163208,
"step": 824
},
{
"epoch": 0.6952861952861953,
"grad_norm": 9.01762866973877,
"learning_rate": 1.8430318429881997e-06,
"loss": 0.8942912817001343,
"step": 826
},
{
"epoch": 0.696969696969697,
"grad_norm": 2.231179714202881,
"learning_rate": 1.8420879146007025e-06,
"loss": 0.8027513027191162,
"step": 828
},
{
"epoch": 0.6986531986531986,
"grad_norm": 3.190427541732788,
"learning_rate": 1.8411414294081003e-06,
"loss": 1.2244315147399902,
"step": 830
},
{
"epoch": 0.7003367003367004,
"grad_norm": 8.976424217224121,
"learning_rate": 1.8401923906714321e-06,
"loss": 0.8990939855575562,
"step": 832
},
{
"epoch": 0.702020202020202,
"grad_norm": 11.49886703491211,
"learning_rate": 1.8392408016605358e-06,
"loss": 0.6986100673675537,
"step": 834
},
{
"epoch": 0.7037037037037037,
"grad_norm": 10.203569412231445,
"learning_rate": 1.8382866656540361e-06,
"loss": 0.8804981708526611,
"step": 836
},
{
"epoch": 0.7053872053872053,
"grad_norm": 6.145118713378906,
"learning_rate": 1.8373299859393326e-06,
"loss": 0.5913242697715759,
"step": 838
},
{
"epoch": 0.7070707070707071,
"grad_norm": 4.84503698348999,
"learning_rate": 1.8363707658125905e-06,
"loss": 1.2492575645446777,
"step": 840
},
{
"epoch": 0.7087542087542088,
"grad_norm": 6.014354228973389,
"learning_rate": 1.8354090085787252e-06,
"loss": 1.122812271118164,
"step": 842
},
{
"epoch": 0.7104377104377104,
"grad_norm": 10.91385269165039,
"learning_rate": 1.8344447175513965e-06,
"loss": 1.0250314474105835,
"step": 844
},
{
"epoch": 0.7121212121212122,
"grad_norm": 5.709978103637695,
"learning_rate": 1.8334778960529916e-06,
"loss": 0.8772053718566895,
"step": 846
},
{
"epoch": 0.7138047138047138,
"grad_norm": 25.334754943847656,
"learning_rate": 1.8325085474146178e-06,
"loss": 0.7974849939346313,
"step": 848
},
{
"epoch": 0.7154882154882155,
"grad_norm": 30.209260940551758,
"learning_rate": 1.8315366749760892e-06,
"loss": 0.9543988704681396,
"step": 850
},
{
"epoch": 0.7171717171717171,
"grad_norm": 3.697704315185547,
"learning_rate": 1.8305622820859153e-06,
"loss": 0.7927026748657227,
"step": 852
},
{
"epoch": 0.7188552188552189,
"grad_norm": 10.00793743133545,
"learning_rate": 1.829585372101289e-06,
"loss": 0.78277987241745,
"step": 854
},
{
"epoch": 0.7205387205387206,
"grad_norm": 7.505032539367676,
"learning_rate": 1.828605948388077e-06,
"loss": 1.1311378479003906,
"step": 856
},
{
"epoch": 0.7222222222222222,
"grad_norm": 4.7181572914123535,
"learning_rate": 1.8276240143208054e-06,
"loss": 0.7503079175949097,
"step": 858
},
{
"epoch": 0.7239057239057239,
"grad_norm": 3.523047924041748,
"learning_rate": 1.8266395732826508e-06,
"loss": 0.9047625064849854,
"step": 860
},
{
"epoch": 0.7255892255892256,
"grad_norm": 2.024121046066284,
"learning_rate": 1.8256526286654264e-06,
"loss": 1.1868062019348145,
"step": 862
},
{
"epoch": 0.7272727272727273,
"grad_norm": 14.294280052185059,
"learning_rate": 1.824663183869572e-06,
"loss": 1.0042986869812012,
"step": 864
},
{
"epoch": 0.7289562289562289,
"grad_norm": 17.085304260253906,
"learning_rate": 1.8236712423041408e-06,
"loss": 0.9877347946166992,
"step": 866
},
{
"epoch": 0.7306397306397306,
"grad_norm": 6.132120609283447,
"learning_rate": 1.822676807386789e-06,
"loss": 1.2511956691741943,
"step": 868
},
{
"epoch": 0.7323232323232324,
"grad_norm": 5.884708881378174,
"learning_rate": 1.8216798825437635e-06,
"loss": 1.1776090860366821,
"step": 870
},
{
"epoch": 0.734006734006734,
"grad_norm": 5.7460737228393555,
"learning_rate": 1.8206804712098903e-06,
"loss": 1.0924787521362305,
"step": 872
},
{
"epoch": 0.7356902356902357,
"grad_norm": 2.724154233932495,
"learning_rate": 1.819678576828561e-06,
"loss": 1.0940457582473755,
"step": 874
},
{
"epoch": 0.7373737373737373,
"grad_norm": 21.470823287963867,
"learning_rate": 1.8186742028517237e-06,
"loss": 0.8332981467247009,
"step": 876
},
{
"epoch": 0.7390572390572391,
"grad_norm": 7.482705116271973,
"learning_rate": 1.8176673527398694e-06,
"loss": 0.6369479894638062,
"step": 878
},
{
"epoch": 0.7407407407407407,
"grad_norm": 15.344402313232422,
"learning_rate": 1.8166580299620202e-06,
"loss": 0.612411618232727,
"step": 880
},
{
"epoch": 0.7424242424242424,
"grad_norm": 2.4508793354034424,
"learning_rate": 1.815646237995718e-06,
"loss": 1.1662663221359253,
"step": 882
},
{
"epoch": 0.7441077441077442,
"grad_norm": 3.4642128944396973,
"learning_rate": 1.814631980327012e-06,
"loss": 1.1108534336090088,
"step": 884
},
{
"epoch": 0.7457912457912458,
"grad_norm": 2.681384801864624,
"learning_rate": 1.813615260450446e-06,
"loss": 0.6596791744232178,
"step": 886
},
{
"epoch": 0.7474747474747475,
"grad_norm": 1.7828519344329834,
"learning_rate": 1.8125960818690485e-06,
"loss": 1.0084741115570068,
"step": 888
},
{
"epoch": 0.7491582491582491,
"grad_norm": 34.723270416259766,
"learning_rate": 1.811574448094318e-06,
"loss": 0.9112769961357117,
"step": 890
},
{
"epoch": 0.7508417508417509,
"grad_norm": 10.580464363098145,
"learning_rate": 1.8105503626462129e-06,
"loss": 0.9600024819374084,
"step": 892
},
{
"epoch": 0.7525252525252525,
"grad_norm": 17.393407821655273,
"learning_rate": 1.8095238290531385e-06,
"loss": 0.7573001384735107,
"step": 894
},
{
"epoch": 0.7542087542087542,
"grad_norm": 8.820290565490723,
"learning_rate": 1.8084948508519346e-06,
"loss": 0.8571316003799438,
"step": 896
},
{
"epoch": 0.7558922558922558,
"grad_norm": 15.848811149597168,
"learning_rate": 1.8074634315878644e-06,
"loss": 0.6229598522186279,
"step": 898
},
{
"epoch": 0.7575757575757576,
"grad_norm": 5.893372058868408,
"learning_rate": 1.8064295748146014e-06,
"loss": 0.8924508094787598,
"step": 900
},
{
"epoch": 0.7592592592592593,
"grad_norm": 21.465091705322266,
"learning_rate": 1.8053932840942175e-06,
"loss": 0.6515762209892273,
"step": 902
},
{
"epoch": 0.7609427609427609,
"grad_norm": 3.3033552169799805,
"learning_rate": 1.8043545629971689e-06,
"loss": 1.2100439071655273,
"step": 904
},
{
"epoch": 0.7626262626262627,
"grad_norm": 3.6212236881256104,
"learning_rate": 1.8033134151022881e-06,
"loss": 0.9367895126342773,
"step": 906
},
{
"epoch": 0.7643097643097643,
"grad_norm": 11.270123481750488,
"learning_rate": 1.8022698439967673e-06,
"loss": 0.9181069731712341,
"step": 908
},
{
"epoch": 0.765993265993266,
"grad_norm": 4.863030433654785,
"learning_rate": 1.8012238532761476e-06,
"loss": 0.8502522110939026,
"step": 910
},
{
"epoch": 0.7676767676767676,
"grad_norm": 7.718131065368652,
"learning_rate": 1.8001754465443078e-06,
"loss": 0.9918288588523865,
"step": 912
},
{
"epoch": 0.7693602693602694,
"grad_norm": 10.74516773223877,
"learning_rate": 1.79912462741345e-06,
"loss": 0.8540866374969482,
"step": 914
},
{
"epoch": 0.7710437710437711,
"grad_norm": 6.144227027893066,
"learning_rate": 1.798071399504088e-06,
"loss": 0.9551119804382324,
"step": 916
},
{
"epoch": 0.7727272727272727,
"grad_norm": 3.8601930141448975,
"learning_rate": 1.7970157664450357e-06,
"loss": 0.6338967084884644,
"step": 918
},
{
"epoch": 0.7744107744107744,
"grad_norm": 11.050410270690918,
"learning_rate": 1.7959577318733925e-06,
"loss": 0.5116314888000488,
"step": 920
},
{
"epoch": 0.7760942760942761,
"grad_norm": 4.513789176940918,
"learning_rate": 1.7948972994345328e-06,
"loss": 0.6171036958694458,
"step": 922
},
{
"epoch": 0.7777777777777778,
"grad_norm": 8.82806396484375,
"learning_rate": 1.7938344727820928e-06,
"loss": 0.9206382632255554,
"step": 924
},
{
"epoch": 0.7794612794612794,
"grad_norm": 4.373292446136475,
"learning_rate": 1.7927692555779577e-06,
"loss": 1.1664514541625977,
"step": 926
},
{
"epoch": 0.7811447811447811,
"grad_norm": 3.1802244186401367,
"learning_rate": 1.791701651492248e-06,
"loss": 0.48759081959724426,
"step": 928
},
{
"epoch": 0.7828282828282829,
"grad_norm": 6.313639163970947,
"learning_rate": 1.7906316642033099e-06,
"loss": 1.3327703475952148,
"step": 930
},
{
"epoch": 0.7845117845117845,
"grad_norm": 22.747098922729492,
"learning_rate": 1.7895592973976998e-06,
"loss": 0.8829092383384705,
"step": 932
},
{
"epoch": 0.7861952861952862,
"grad_norm": 3.2088170051574707,
"learning_rate": 1.7884845547701721e-06,
"loss": 1.0014090538024902,
"step": 934
},
{
"epoch": 0.7878787878787878,
"grad_norm": 12.781431198120117,
"learning_rate": 1.7874074400236677e-06,
"loss": 0.8620262145996094,
"step": 936
},
{
"epoch": 0.7895622895622896,
"grad_norm": 2.6499383449554443,
"learning_rate": 1.7863279568692999e-06,
"loss": 0.8909909725189209,
"step": 938
},
{
"epoch": 0.7912457912457912,
"grad_norm": 2.3473894596099854,
"learning_rate": 1.7852461090263422e-06,
"loss": 1.0048516988754272,
"step": 940
},
{
"epoch": 0.7929292929292929,
"grad_norm": 16.40445327758789,
"learning_rate": 1.7841619002222164e-06,
"loss": 0.3737819790840149,
"step": 942
},
{
"epoch": 0.7946127946127947,
"grad_norm": 3.327476978302002,
"learning_rate": 1.7830753341924768e-06,
"loss": 0.9010682106018066,
"step": 944
},
{
"epoch": 0.7962962962962963,
"grad_norm": 2.6396255493164062,
"learning_rate": 1.781986414680802e-06,
"loss": 0.925070583820343,
"step": 946
},
{
"epoch": 0.797979797979798,
"grad_norm": 3.3719475269317627,
"learning_rate": 1.7808951454389761e-06,
"loss": 1.036871075630188,
"step": 948
},
{
"epoch": 0.7996632996632996,
"grad_norm": 5.47444486618042,
"learning_rate": 1.7798015302268826e-06,
"loss": 0.8623565435409546,
"step": 950
},
{
"epoch": 0.8013468013468014,
"grad_norm": 11.89119815826416,
"learning_rate": 1.7787055728124853e-06,
"loss": 0.4426053762435913,
"step": 952
},
{
"epoch": 0.803030303030303,
"grad_norm": 3.086700916290283,
"learning_rate": 1.777607276971818e-06,
"loss": 0.9516481161117554,
"step": 954
},
{
"epoch": 0.8047138047138047,
"grad_norm": 11.045938491821289,
"learning_rate": 1.7765066464889729e-06,
"loss": 0.9658932685852051,
"step": 956
},
{
"epoch": 0.8063973063973064,
"grad_norm": 10.93420696258545,
"learning_rate": 1.775403685156085e-06,
"loss": 1.1045958995819092,
"step": 958
},
{
"epoch": 0.8080808080808081,
"grad_norm": 2.5317461490631104,
"learning_rate": 1.77429839677332e-06,
"loss": 0.673387348651886,
"step": 960
},
{
"epoch": 0.8097643097643098,
"grad_norm": 4.62790584564209,
"learning_rate": 1.773190785148861e-06,
"loss": 0.771082878112793,
"step": 962
},
{
"epoch": 0.8114478114478114,
"grad_norm": 6.418295860290527,
"learning_rate": 1.7720808540988965e-06,
"loss": 0.6905859112739563,
"step": 964
},
{
"epoch": 0.8131313131313131,
"grad_norm": 2.9778709411621094,
"learning_rate": 1.770968607447606e-06,
"loss": 0.9952410459518433,
"step": 966
},
{
"epoch": 0.8148148148148148,
"grad_norm": 17.664697647094727,
"learning_rate": 1.7698540490271475e-06,
"loss": 1.1883214712142944,
"step": 968
},
{
"epoch": 0.8164983164983165,
"grad_norm": 3.8164806365966797,
"learning_rate": 1.7687371826776432e-06,
"loss": 0.9806801080703735,
"step": 970
},
{
"epoch": 0.8181818181818182,
"grad_norm": 10.780609130859375,
"learning_rate": 1.7676180122471677e-06,
"loss": 0.9630722403526306,
"step": 972
},
{
"epoch": 0.8198653198653199,
"grad_norm": 6.188197612762451,
"learning_rate": 1.7664965415917342e-06,
"loss": 0.7298092842102051,
"step": 974
},
{
"epoch": 0.8215488215488216,
"grad_norm": 4.687350749969482,
"learning_rate": 1.765372774575281e-06,
"loss": 0.9373712539672852,
"step": 976
},
{
"epoch": 0.8232323232323232,
"grad_norm": 5.430413722991943,
"learning_rate": 1.764246715069658e-06,
"loss": 1.1954350471496582,
"step": 978
},
{
"epoch": 0.8249158249158249,
"grad_norm": 3.7986605167388916,
"learning_rate": 1.7631183669546146e-06,
"loss": 1.161393404006958,
"step": 980
},
{
"epoch": 0.8265993265993266,
"grad_norm": 4.60081672668457,
"learning_rate": 1.761987734117784e-06,
"loss": 1.046337366104126,
"step": 982
},
{
"epoch": 0.8282828282828283,
"grad_norm": 3.7046844959259033,
"learning_rate": 1.7608548204546724e-06,
"loss": 1.0424065589904785,
"step": 984
},
{
"epoch": 0.82996632996633,
"grad_norm": 19.03668212890625,
"learning_rate": 1.7597196298686446e-06,
"loss": 0.9536873698234558,
"step": 986
},
{
"epoch": 0.8316498316498316,
"grad_norm": 32.48857498168945,
"learning_rate": 1.7585821662709088e-06,
"loss": 0.8443811535835266,
"step": 988
},
{
"epoch": 0.8333333333333334,
"grad_norm": 11.665223121643066,
"learning_rate": 1.7574424335805066e-06,
"loss": 0.8324294686317444,
"step": 990
},
{
"epoch": 0.835016835016835,
"grad_norm": 21.848285675048828,
"learning_rate": 1.7563004357242962e-06,
"loss": 0.6908457279205322,
"step": 992
},
{
"epoch": 0.8367003367003367,
"grad_norm": 2.1612720489501953,
"learning_rate": 1.755156176636941e-06,
"loss": 0.9239605069160461,
"step": 994
},
{
"epoch": 0.8383838383838383,
"grad_norm": 4.865361213684082,
"learning_rate": 1.7540096602608946e-06,
"loss": 0.6591212153434753,
"step": 996
},
{
"epoch": 0.8400673400673401,
"grad_norm": 3.861494779586792,
"learning_rate": 1.7528608905463881e-06,
"loss": 0.9056419134140015,
"step": 998
},
{
"epoch": 0.8417508417508418,
"grad_norm": 2.9562947750091553,
"learning_rate": 1.7517098714514175e-06,
"loss": 1.0812749862670898,
"step": 1000
},
{
"epoch": 0.8434343434343434,
"grad_norm": 3.0346264839172363,
"learning_rate": 1.7505566069417272e-06,
"loss": 0.7617006301879883,
"step": 1002
},
{
"epoch": 0.8451178451178452,
"grad_norm": 3.785036325454712,
"learning_rate": 1.749401100990799e-06,
"loss": 0.6745568513870239,
"step": 1004
},
{
"epoch": 0.8468013468013468,
"grad_norm": 5.557058334350586,
"learning_rate": 1.748243357579837e-06,
"loss": 1.0811188220977783,
"step": 1006
},
{
"epoch": 0.8484848484848485,
"grad_norm": 1.9689534902572632,
"learning_rate": 1.747083380697754e-06,
"loss": 0.5900795459747314,
"step": 1008
},
{
"epoch": 0.8501683501683501,
"grad_norm": 14.491848945617676,
"learning_rate": 1.7459211743411589e-06,
"loss": 0.9504165649414062,
"step": 1010
},
{
"epoch": 0.8518518518518519,
"grad_norm": 21.8311767578125,
"learning_rate": 1.7447567425143413e-06,
"loss": 0.8922120928764343,
"step": 1012
},
{
"epoch": 0.8535353535353535,
"grad_norm": 13.790666580200195,
"learning_rate": 1.7435900892292593e-06,
"loss": 0.7710224390029907,
"step": 1014
},
{
"epoch": 0.8552188552188552,
"grad_norm": 20.326784133911133,
"learning_rate": 1.7424212185055236e-06,
"loss": 0.6666241884231567,
"step": 1016
},
{
"epoch": 0.8569023569023569,
"grad_norm": 18.170595169067383,
"learning_rate": 1.7412501343703858e-06,
"loss": 0.967223048210144,
"step": 1018
},
{
"epoch": 0.8585858585858586,
"grad_norm": 3.054368257522583,
"learning_rate": 1.740076840858724e-06,
"loss": 1.2456423044204712,
"step": 1020
},
{
"epoch": 0.8602693602693603,
"grad_norm": 26.2432861328125,
"learning_rate": 1.7389013420130278e-06,
"loss": 0.9183678030967712,
"step": 1022
},
{
"epoch": 0.8619528619528619,
"grad_norm": 4.530948162078857,
"learning_rate": 1.7377236418833855e-06,
"loss": 0.953632652759552,
"step": 1024
},
{
"epoch": 0.8636363636363636,
"grad_norm": 4.451155185699463,
"learning_rate": 1.736543744527469e-06,
"loss": 0.8909140825271606,
"step": 1026
},
{
"epoch": 0.8653198653198653,
"grad_norm": 3.3854105472564697,
"learning_rate": 1.7353616540105214e-06,
"loss": 0.9759948253631592,
"step": 1028
},
{
"epoch": 0.867003367003367,
"grad_norm": 7.278261184692383,
"learning_rate": 1.7341773744053423e-06,
"loss": 0.643425703048706,
"step": 1030
},
{
"epoch": 0.8686868686868687,
"grad_norm": 3.562976360321045,
"learning_rate": 1.7329909097922726e-06,
"loss": 0.8528425693511963,
"step": 1032
},
{
"epoch": 0.8703703703703703,
"grad_norm": 4.631925106048584,
"learning_rate": 1.7318022642591826e-06,
"loss": 0.9317729473114014,
"step": 1034
},
{
"epoch": 0.8720538720538721,
"grad_norm": 2.9623520374298096,
"learning_rate": 1.730611441901456e-06,
"loss": 0.9544110298156738,
"step": 1036
},
{
"epoch": 0.8737373737373737,
"grad_norm": 7.970090389251709,
"learning_rate": 1.7294184468219768e-06,
"loss": 1.1069408655166626,
"step": 1038
},
{
"epoch": 0.8754208754208754,
"grad_norm": 5.28152322769165,
"learning_rate": 1.728223283131116e-06,
"loss": 1.0873464345932007,
"step": 1040
},
{
"epoch": 0.877104377104377,
"grad_norm": 5.224731922149658,
"learning_rate": 1.727025954946714e-06,
"loss": 0.9729514718055725,
"step": 1042
},
{
"epoch": 0.8787878787878788,
"grad_norm": 13.218440055847168,
"learning_rate": 1.7258264663940706e-06,
"loss": 1.0898833274841309,
"step": 1044
},
{
"epoch": 0.8804713804713805,
"grad_norm": 2.7989261150360107,
"learning_rate": 1.724624821605929e-06,
"loss": 1.0561833381652832,
"step": 1046
},
{
"epoch": 0.8821548821548821,
"grad_norm": 13.938822746276855,
"learning_rate": 1.7234210247224608e-06,
"loss": 0.9620407223701477,
"step": 1048
},
{
"epoch": 0.8838383838383839,
"grad_norm": 14.411212921142578,
"learning_rate": 1.7222150798912527e-06,
"loss": 0.7809741497039795,
"step": 1050
},
{
"epoch": 0.8855218855218855,
"grad_norm": 6.374806880950928,
"learning_rate": 1.7210069912672924e-06,
"loss": 1.0467114448547363,
"step": 1052
},
{
"epoch": 0.8872053872053872,
"grad_norm": 9.24715805053711,
"learning_rate": 1.7197967630129533e-06,
"loss": 0.5621042251586914,
"step": 1054
},
{
"epoch": 0.8888888888888888,
"grad_norm": 3.0764286518096924,
"learning_rate": 1.7185843992979805e-06,
"loss": 0.9588031768798828,
"step": 1056
},
{
"epoch": 0.8905723905723906,
"grad_norm": 3.0444071292877197,
"learning_rate": 1.7173699042994778e-06,
"loss": 0.9131466150283813,
"step": 1058
},
{
"epoch": 0.8922558922558923,
"grad_norm": 7.547487735748291,
"learning_rate": 1.716153282201891e-06,
"loss": 0.9909827709197998,
"step": 1060
},
{
"epoch": 0.8939393939393939,
"grad_norm": 3.859555959701538,
"learning_rate": 1.7149345371969958e-06,
"loss": 0.8949623107910156,
"step": 1062
},
{
"epoch": 0.8956228956228957,
"grad_norm": 10.671557426452637,
"learning_rate": 1.7137136734838809e-06,
"loss": 0.8130732774734497,
"step": 1064
},
{
"epoch": 0.8973063973063973,
"grad_norm": 2.6384527683258057,
"learning_rate": 1.7124906952689354e-06,
"loss": 1.0677348375320435,
"step": 1066
},
{
"epoch": 0.898989898989899,
"grad_norm": 2.3599157333374023,
"learning_rate": 1.7112656067658345e-06,
"loss": 0.8169218301773071,
"step": 1068
},
{
"epoch": 0.9006734006734006,
"grad_norm": 6.580990314483643,
"learning_rate": 1.7100384121955229e-06,
"loss": 0.9567373991012573,
"step": 1070
},
{
"epoch": 0.9023569023569024,
"grad_norm": 2.7122886180877686,
"learning_rate": 1.7088091157862026e-06,
"loss": 1.2019579410552979,
"step": 1072
},
{
"epoch": 0.9040404040404041,
"grad_norm": 2.5349674224853516,
"learning_rate": 1.7075777217733169e-06,
"loss": 0.8406597971916199,
"step": 1074
},
{
"epoch": 0.9057239057239057,
"grad_norm": 6.190466403961182,
"learning_rate": 1.7063442343995361e-06,
"loss": 0.4906361401081085,
"step": 1076
},
{
"epoch": 0.9074074074074074,
"grad_norm": 26.555025100708008,
"learning_rate": 1.7051086579147436e-06,
"loss": 1.0886037349700928,
"step": 1078
},
{
"epoch": 0.9090909090909091,
"grad_norm": 3.0735490322113037,
"learning_rate": 1.7038709965760198e-06,
"loss": 0.9269078969955444,
"step": 1080
},
{
"epoch": 0.9107744107744108,
"grad_norm": 2.295616865158081,
"learning_rate": 1.7026312546476292e-06,
"loss": 0.9460815191268921,
"step": 1082
},
{
"epoch": 0.9124579124579124,
"grad_norm": 14.62086009979248,
"learning_rate": 1.701389436401004e-06,
"loss": 0.7059042453765869,
"step": 1084
},
{
"epoch": 0.9141414141414141,
"grad_norm": 4.020232200622559,
"learning_rate": 1.700145546114731e-06,
"loss": 1.15854811668396,
"step": 1086
},
{
"epoch": 0.9158249158249159,
"grad_norm": 4.303004264831543,
"learning_rate": 1.698899588074535e-06,
"loss": 0.9253766536712646,
"step": 1088
},
{
"epoch": 0.9175084175084175,
"grad_norm": 2.722356081008911,
"learning_rate": 1.6976515665732663e-06,
"loss": 0.9150590896606445,
"step": 1090
},
{
"epoch": 0.9191919191919192,
"grad_norm": 8.33704948425293,
"learning_rate": 1.6964014859108837e-06,
"loss": 1.0268497467041016,
"step": 1092
},
{
"epoch": 0.9208754208754208,
"grad_norm": 4.683021068572998,
"learning_rate": 1.6951493503944414e-06,
"loss": 0.9068109393119812,
"step": 1094
},
{
"epoch": 0.9225589225589226,
"grad_norm": 10.631436347961426,
"learning_rate": 1.693895164338073e-06,
"loss": 0.7467716932296753,
"step": 1096
},
{
"epoch": 0.9242424242424242,
"grad_norm": 8.113303184509277,
"learning_rate": 1.6926389320629768e-06,
"loss": 0.384426474571228,
"step": 1098
},
{
"epoch": 0.9259259259259259,
"grad_norm": 5.846349239349365,
"learning_rate": 1.6913806578974016e-06,
"loss": 0.9705697298049927,
"step": 1100
},
{
"epoch": 0.9276094276094277,
"grad_norm": 23.626840591430664,
"learning_rate": 1.690120346176632e-06,
"loss": 0.5436959266662598,
"step": 1102
},
{
"epoch": 0.9292929292929293,
"grad_norm": 4.793126106262207,
"learning_rate": 1.6888580012429717e-06,
"loss": 1.117484450340271,
"step": 1104
},
{
"epoch": 0.930976430976431,
"grad_norm": 10.387064933776855,
"learning_rate": 1.68759362744573e-06,
"loss": 1.031156301498413,
"step": 1106
},
{
"epoch": 0.9326599326599326,
"grad_norm": 14.877448081970215,
"learning_rate": 1.686327229141207e-06,
"loss": 0.8722270131111145,
"step": 1108
},
{
"epoch": 0.9343434343434344,
"grad_norm": 3.464400053024292,
"learning_rate": 1.6850588106926773e-06,
"loss": 1.2158129215240479,
"step": 1110
},
{
"epoch": 0.936026936026936,
"grad_norm": 4.9829421043396,
"learning_rate": 1.6837883764703765e-06,
"loss": 1.1986503601074219,
"step": 1112
},
{
"epoch": 0.9377104377104377,
"grad_norm": 3.5053603649139404,
"learning_rate": 1.6825159308514847e-06,
"loss": 1.0430546998977661,
"step": 1114
},
{
"epoch": 0.9393939393939394,
"grad_norm": 6.993835926055908,
"learning_rate": 1.6812414782201127e-06,
"loss": 1.1407470703125,
"step": 1116
},
{
"epoch": 0.9410774410774411,
"grad_norm": 6.774454116821289,
"learning_rate": 1.6799650229672862e-06,
"loss": 1.0087709426879883,
"step": 1118
},
{
"epoch": 0.9427609427609428,
"grad_norm": 3.8694427013397217,
"learning_rate": 1.6786865694909301e-06,
"loss": 1.2728749513626099,
"step": 1120
},
{
"epoch": 0.9444444444444444,
"grad_norm": 8.199234962463379,
"learning_rate": 1.6774061221958552e-06,
"loss": 0.7386917471885681,
"step": 1122
},
{
"epoch": 0.9461279461279462,
"grad_norm": 3.474858283996582,
"learning_rate": 1.6761236854937406e-06,
"loss": 0.8540256023406982,
"step": 1124
},
{
"epoch": 0.9478114478114478,
"grad_norm": 5.611124038696289,
"learning_rate": 1.674839263803121e-06,
"loss": 0.849441409111023,
"step": 1126
},
{
"epoch": 0.9494949494949495,
"grad_norm": 3.0861027240753174,
"learning_rate": 1.6735528615493686e-06,
"loss": 0.9585309028625488,
"step": 1128
},
{
"epoch": 0.9511784511784511,
"grad_norm": 20.665544509887695,
"learning_rate": 1.6722644831646815e-06,
"loss": 0.9195750951766968,
"step": 1130
},
{
"epoch": 0.9528619528619529,
"grad_norm": 2.3980801105499268,
"learning_rate": 1.6709741330880644e-06,
"loss": 0.9300163984298706,
"step": 1132
},
{
"epoch": 0.9545454545454546,
"grad_norm": 11.30346393585205,
"learning_rate": 1.6696818157653172e-06,
"loss": 0.9436147212982178,
"step": 1134
},
{
"epoch": 0.9562289562289562,
"grad_norm": 15.200255393981934,
"learning_rate": 1.6683875356490157e-06,
"loss": 0.83840012550354,
"step": 1136
},
{
"epoch": 0.9579124579124579,
"grad_norm": 11.014248847961426,
"learning_rate": 1.6670912971985002e-06,
"loss": 0.7340762615203857,
"step": 1138
},
{
"epoch": 0.9595959595959596,
"grad_norm": 3.3604698181152344,
"learning_rate": 1.6657931048798576e-06,
"loss": 0.5434874296188354,
"step": 1140
},
{
"epoch": 0.9612794612794613,
"grad_norm": 8.75454330444336,
"learning_rate": 1.6644929631659061e-06,
"loss": 0.8939019441604614,
"step": 1142
},
{
"epoch": 0.9629629629629629,
"grad_norm": 14.948843955993652,
"learning_rate": 1.6631908765361818e-06,
"loss": 0.6150766611099243,
"step": 1144
},
{
"epoch": 0.9646464646464646,
"grad_norm": 2.9250028133392334,
"learning_rate": 1.6618868494769202e-06,
"loss": 0.8925027847290039,
"step": 1146
},
{
"epoch": 0.9663299663299664,
"grad_norm": 10.11111831665039,
"learning_rate": 1.6605808864810437e-06,
"loss": 0.7491191029548645,
"step": 1148
},
{
"epoch": 0.968013468013468,
"grad_norm": 8.039884567260742,
"learning_rate": 1.6592729920481443e-06,
"loss": 0.9510982036590576,
"step": 1150
},
{
"epoch": 0.9696969696969697,
"grad_norm": 11.84205150604248,
"learning_rate": 1.6579631706844683e-06,
"loss": 0.6039742231369019,
"step": 1152
},
{
"epoch": 0.9713804713804713,
"grad_norm": 26.592609405517578,
"learning_rate": 1.6566514269029015e-06,
"loss": 0.9072830677032471,
"step": 1154
},
{
"epoch": 0.9730639730639731,
"grad_norm": 4.943899154663086,
"learning_rate": 1.6553377652229536e-06,
"loss": 0.5825839042663574,
"step": 1156
},
{
"epoch": 0.9747474747474747,
"grad_norm": 5.413260459899902,
"learning_rate": 1.6540221901707413e-06,
"loss": 0.9307392835617065,
"step": 1158
},
{
"epoch": 0.9764309764309764,
"grad_norm": 6.360762119293213,
"learning_rate": 1.6527047062789743e-06,
"loss": 0.4215626120567322,
"step": 1160
},
{
"epoch": 0.9781144781144782,
"grad_norm": 9.286370277404785,
"learning_rate": 1.6513853180869391e-06,
"loss": 1.088386058807373,
"step": 1162
},
{
"epoch": 0.9797979797979798,
"grad_norm": 6.5988993644714355,
"learning_rate": 1.6500640301404832e-06,
"loss": 0.6811473965644836,
"step": 1164
},
{
"epoch": 0.9814814814814815,
"grad_norm": 9.595373153686523,
"learning_rate": 1.6487408469919992e-06,
"loss": 0.7789331674575806,
"step": 1166
},
{
"epoch": 0.9831649831649831,
"grad_norm": 5.964288234710693,
"learning_rate": 1.6474157732004101e-06,
"loss": 0.8091530203819275,
"step": 1168
},
{
"epoch": 0.9848484848484849,
"grad_norm": 11.993547439575195,
"learning_rate": 1.6460888133311526e-06,
"loss": 0.832628607749939,
"step": 1170
},
{
"epoch": 0.9865319865319865,
"grad_norm": 3.2034716606140137,
"learning_rate": 1.6447599719561616e-06,
"loss": 0.612036406993866,
"step": 1172
},
{
"epoch": 0.9882154882154882,
"grad_norm": 5.53648567199707,
"learning_rate": 1.6434292536538547e-06,
"loss": 0.9042845964431763,
"step": 1174
},
{
"epoch": 0.98989898989899,
"grad_norm": 5.690428733825684,
"learning_rate": 1.6420966630091168e-06,
"loss": 0.44773343205451965,
"step": 1176
},
{
"epoch": 0.9915824915824916,
"grad_norm": 11.099560737609863,
"learning_rate": 1.6407622046132831e-06,
"loss": 1.0306243896484375,
"step": 1178
},
{
"epoch": 0.9932659932659933,
"grad_norm": 11.031452178955078,
"learning_rate": 1.6394258830641243e-06,
"loss": 0.42686060070991516,
"step": 1180
},
{
"epoch": 0.9949494949494949,
"grad_norm": 2.295154094696045,
"learning_rate": 1.6380877029658303e-06,
"loss": 0.8935648202896118,
"step": 1182
},
{
"epoch": 0.9966329966329966,
"grad_norm": 5.188049793243408,
"learning_rate": 1.6367476689289947e-06,
"loss": 1.000899076461792,
"step": 1184
},
{
"epoch": 0.9983164983164983,
"grad_norm": 5.049581527709961,
"learning_rate": 1.6354057855705984e-06,
"loss": 0.6279634833335876,
"step": 1186
},
{
"epoch": 1.0,
"grad_norm": 15.246573448181152,
"learning_rate": 1.6340620575139947e-06,
"loss": 0.6900116205215454,
"step": 1188
},
{
"epoch": 1.0016835016835017,
"grad_norm": 5.413362503051758,
"learning_rate": 1.6327164893888913e-06,
"loss": 0.39591357111930847,
"step": 1190
},
{
"epoch": 1.0033670033670035,
"grad_norm": 7.250094890594482,
"learning_rate": 1.6313690858313374e-06,
"loss": 0.41023939847946167,
"step": 1192
},
{
"epoch": 1.005050505050505,
"grad_norm": 4.482004642486572,
"learning_rate": 1.6300198514837045e-06,
"loss": 1.090850591659546,
"step": 1194
},
{
"epoch": 1.0067340067340067,
"grad_norm": 15.401289939880371,
"learning_rate": 1.6286687909946732e-06,
"loss": 0.8496726751327515,
"step": 1196
},
{
"epoch": 1.0084175084175084,
"grad_norm": 2.563889741897583,
"learning_rate": 1.6273159090192152e-06,
"loss": 0.9915731549263,
"step": 1198
},
{
"epoch": 1.0101010101010102,
"grad_norm": 8.505236625671387,
"learning_rate": 1.6259612102185778e-06,
"loss": 1.0761607885360718,
"step": 1200
},
{
"epoch": 1.0117845117845117,
"grad_norm": 2.467069625854492,
"learning_rate": 1.6246046992602685e-06,
"loss": 0.9234099984169006,
"step": 1202
},
{
"epoch": 1.0134680134680134,
"grad_norm": 2.2489092350006104,
"learning_rate": 1.6232463808180385e-06,
"loss": 0.9091596007347107,
"step": 1204
},
{
"epoch": 1.0151515151515151,
"grad_norm": 24.074737548828125,
"learning_rate": 1.6218862595718664e-06,
"loss": 1.0585005283355713,
"step": 1206
},
{
"epoch": 1.0168350168350169,
"grad_norm": 11.167364120483398,
"learning_rate": 1.620524340207942e-06,
"loss": 0.6014789938926697,
"step": 1208
},
{
"epoch": 1.0185185185185186,
"grad_norm": 9.423373222351074,
"learning_rate": 1.6191606274186504e-06,
"loss": 0.5883907079696655,
"step": 1210
},
{
"epoch": 1.02020202020202,
"grad_norm": 4.673365592956543,
"learning_rate": 1.6177951259025562e-06,
"loss": 0.5414766669273376,
"step": 1212
},
{
"epoch": 1.0218855218855218,
"grad_norm": 8.354643821716309,
"learning_rate": 1.6164278403643867e-06,
"loss": 0.7363089919090271,
"step": 1214
},
{
"epoch": 1.0235690235690236,
"grad_norm": 6.500521183013916,
"learning_rate": 1.6150587755150158e-06,
"loss": 0.38967499136924744,
"step": 1216
},
{
"epoch": 1.0252525252525253,
"grad_norm": 24.9106388092041,
"learning_rate": 1.6136879360714478e-06,
"loss": 0.9002467393875122,
"step": 1218
},
{
"epoch": 1.026936026936027,
"grad_norm": 3.819883346557617,
"learning_rate": 1.612315326756802e-06,
"loss": 0.7683883905410767,
"step": 1220
},
{
"epoch": 1.0286195286195285,
"grad_norm": 34.932952880859375,
"learning_rate": 1.6109409523002942e-06,
"loss": 0.9174226522445679,
"step": 1222
},
{
"epoch": 1.0303030303030303,
"grad_norm": 2.4514238834381104,
"learning_rate": 1.6095648174372231e-06,
"loss": 1.0709283351898193,
"step": 1224
},
{
"epoch": 1.031986531986532,
"grad_norm": 4.087513446807861,
"learning_rate": 1.6081869269089522e-06,
"loss": 0.6256165504455566,
"step": 1226
},
{
"epoch": 1.0336700336700337,
"grad_norm": 3.7036447525024414,
"learning_rate": 1.606807285462894e-06,
"loss": 0.8476806282997131,
"step": 1228
},
{
"epoch": 1.0353535353535352,
"grad_norm": 2.504366397857666,
"learning_rate": 1.6054258978524943e-06,
"loss": 0.8022794127464294,
"step": 1230
},
{
"epoch": 1.037037037037037,
"grad_norm": 11.632919311523438,
"learning_rate": 1.6040427688372143e-06,
"loss": 0.4790239632129669,
"step": 1232
},
{
"epoch": 1.0387205387205387,
"grad_norm": 1.2272193431854248,
"learning_rate": 1.602657903182515e-06,
"loss": 0.7812309265136719,
"step": 1234
},
{
"epoch": 1.0404040404040404,
"grad_norm": 1.8513426780700684,
"learning_rate": 1.6012713056598423e-06,
"loss": 0.7921426892280579,
"step": 1236
},
{
"epoch": 1.0420875420875422,
"grad_norm": 4.828263282775879,
"learning_rate": 1.599882981046607e-06,
"loss": 0.5412895679473877,
"step": 1238
},
{
"epoch": 1.0437710437710437,
"grad_norm": 2.7645084857940674,
"learning_rate": 1.5984929341261724e-06,
"loss": 0.9840224981307983,
"step": 1240
},
{
"epoch": 1.0454545454545454,
"grad_norm": 3.864872455596924,
"learning_rate": 1.5971011696878342e-06,
"loss": 0.9463930130004883,
"step": 1242
},
{
"epoch": 1.0471380471380471,
"grad_norm": 4.084227561950684,
"learning_rate": 1.5957076925268072e-06,
"loss": 0.639992356300354,
"step": 1244
},
{
"epoch": 1.0488215488215489,
"grad_norm": 3.3840675354003906,
"learning_rate": 1.5943125074442064e-06,
"loss": 0.6726884841918945,
"step": 1246
},
{
"epoch": 1.0505050505050506,
"grad_norm": 2.852729558944702,
"learning_rate": 1.5929156192470313e-06,
"loss": 0.9147169589996338,
"step": 1248
},
{
"epoch": 1.0521885521885521,
"grad_norm": 4.347400665283203,
"learning_rate": 1.5915170327481491e-06,
"loss": 0.7575803995132446,
"step": 1250
},
{
"epoch": 1.0538720538720538,
"grad_norm": 12.422771453857422,
"learning_rate": 1.5901167527662796e-06,
"loss": 0.6838544607162476,
"step": 1252
},
{
"epoch": 1.0555555555555556,
"grad_norm": 11.088696479797363,
"learning_rate": 1.5887147841259758e-06,
"loss": 0.9683138728141785,
"step": 1254
},
{
"epoch": 1.0572390572390573,
"grad_norm": 5.527649879455566,
"learning_rate": 1.5873111316576102e-06,
"loss": 0.7508020401000977,
"step": 1256
},
{
"epoch": 1.0589225589225588,
"grad_norm": 4.718619346618652,
"learning_rate": 1.5859058001973555e-06,
"loss": 0.5126559734344482,
"step": 1258
},
{
"epoch": 1.0606060606060606,
"grad_norm": 5.101532459259033,
"learning_rate": 1.5844987945871701e-06,
"loss": 0.77130526304245,
"step": 1260
},
{
"epoch": 1.0622895622895623,
"grad_norm": 5.325422763824463,
"learning_rate": 1.5830901196747805e-06,
"loss": 0.6283507347106934,
"step": 1262
},
{
"epoch": 1.063973063973064,
"grad_norm": 15.08485221862793,
"learning_rate": 1.5816797803136647e-06,
"loss": 0.7283768653869629,
"step": 1264
},
{
"epoch": 1.0656565656565657,
"grad_norm": 3.9415273666381836,
"learning_rate": 1.5802677813630348e-06,
"loss": 0.6957473754882812,
"step": 1266
},
{
"epoch": 1.0673400673400673,
"grad_norm": 10.470375061035156,
"learning_rate": 1.5788541276878212e-06,
"loss": 0.6225847005844116,
"step": 1268
},
{
"epoch": 1.069023569023569,
"grad_norm": 13.44847583770752,
"learning_rate": 1.577438824158656e-06,
"loss": 0.6269044280052185,
"step": 1270
},
{
"epoch": 1.0707070707070707,
"grad_norm": 1.2674486637115479,
"learning_rate": 1.5760218756518548e-06,
"loss": 0.6266012191772461,
"step": 1272
},
{
"epoch": 1.0723905723905724,
"grad_norm": 25.154924392700195,
"learning_rate": 1.5746032870494022e-06,
"loss": 0.4940655827522278,
"step": 1274
},
{
"epoch": 1.074074074074074,
"grad_norm": 5.607649326324463,
"learning_rate": 1.5731830632389322e-06,
"loss": 0.6989841461181641,
"step": 1276
},
{
"epoch": 1.0757575757575757,
"grad_norm": 111.35026550292969,
"learning_rate": 1.5717612091137137e-06,
"loss": 0.9674046039581299,
"step": 1278
},
{
"epoch": 1.0774410774410774,
"grad_norm": 36.46900939941406,
"learning_rate": 1.570337729572632e-06,
"loss": 0.5374500751495361,
"step": 1280
},
{
"epoch": 1.0791245791245792,
"grad_norm": 7.345931529998779,
"learning_rate": 1.5689126295201738e-06,
"loss": 0.3302645683288574,
"step": 1282
},
{
"epoch": 1.0808080808080809,
"grad_norm": 4.141447067260742,
"learning_rate": 1.5674859138664076e-06,
"loss": 1.053006887435913,
"step": 1284
},
{
"epoch": 1.0824915824915824,
"grad_norm": 18.335811614990234,
"learning_rate": 1.5660575875269696e-06,
"loss": 0.9029141664505005,
"step": 1286
},
{
"epoch": 1.0841750841750841,
"grad_norm": 4.0398850440979,
"learning_rate": 1.5646276554230454e-06,
"loss": 0.5438280701637268,
"step": 1288
},
{
"epoch": 1.0858585858585859,
"grad_norm": 20.008378982543945,
"learning_rate": 1.563196122481352e-06,
"loss": 0.6676660776138306,
"step": 1290
},
{
"epoch": 1.0875420875420876,
"grad_norm": 3.3898210525512695,
"learning_rate": 1.5617629936341225e-06,
"loss": 1.1070988178253174,
"step": 1292
},
{
"epoch": 1.0892255892255893,
"grad_norm": 5.172207355499268,
"learning_rate": 1.5603282738190898e-06,
"loss": 0.7852774858474731,
"step": 1294
},
{
"epoch": 1.0909090909090908,
"grad_norm": 14.538901329040527,
"learning_rate": 1.5588919679794668e-06,
"loss": 0.583429753780365,
"step": 1296
},
{
"epoch": 1.0925925925925926,
"grad_norm": 6.987974166870117,
"learning_rate": 1.5574540810639312e-06,
"loss": 0.6342300176620483,
"step": 1298
},
{
"epoch": 1.0942760942760943,
"grad_norm": 13.806412696838379,
"learning_rate": 1.556014618026609e-06,
"loss": 0.6277361512184143,
"step": 1300
},
{
"epoch": 1.095959595959596,
"grad_norm": 11.233121871948242,
"learning_rate": 1.5545735838270556e-06,
"loss": 0.6347372531890869,
"step": 1302
},
{
"epoch": 1.0976430976430978,
"grad_norm": 4.906972885131836,
"learning_rate": 1.5531309834302403e-06,
"loss": 0.5694692134857178,
"step": 1304
},
{
"epoch": 1.0993265993265993,
"grad_norm": 13.255314826965332,
"learning_rate": 1.5516868218065283e-06,
"loss": 0.5988457798957825,
"step": 1306
},
{
"epoch": 1.101010101010101,
"grad_norm": 18.89320182800293,
"learning_rate": 1.5502411039316642e-06,
"loss": 0.5894651412963867,
"step": 1308
},
{
"epoch": 1.1026936026936027,
"grad_norm": 2.3720078468322754,
"learning_rate": 1.5487938347867542e-06,
"loss": 0.39072656631469727,
"step": 1310
},
{
"epoch": 1.1043771043771045,
"grad_norm": 3.8021674156188965,
"learning_rate": 1.5473450193582498e-06,
"loss": 1.1303743124008179,
"step": 1312
},
{
"epoch": 1.106060606060606,
"grad_norm": 12.77686882019043,
"learning_rate": 1.5458946626379293e-06,
"loss": 0.9466381072998047,
"step": 1314
},
{
"epoch": 1.1077441077441077,
"grad_norm": 16.367809295654297,
"learning_rate": 1.5444427696228822e-06,
"loss": 0.896185576915741,
"step": 1316
},
{
"epoch": 1.1094276094276094,
"grad_norm": 4.367947578430176,
"learning_rate": 1.5429893453154906e-06,
"loss": 0.9018317461013794,
"step": 1318
},
{
"epoch": 1.1111111111111112,
"grad_norm": 11.2949857711792,
"learning_rate": 1.5415343947234132e-06,
"loss": 0.5716771483421326,
"step": 1320
},
{
"epoch": 1.112794612794613,
"grad_norm": 3.638136386871338,
"learning_rate": 1.5400779228595663e-06,
"loss": 0.8265483379364014,
"step": 1322
},
{
"epoch": 1.1144781144781144,
"grad_norm": 23.661731719970703,
"learning_rate": 1.538619934742109e-06,
"loss": 0.5200953483581543,
"step": 1324
},
{
"epoch": 1.1161616161616161,
"grad_norm": 5.394420146942139,
"learning_rate": 1.5371604353944235e-06,
"loss": 0.8769002556800842,
"step": 1326
},
{
"epoch": 1.1178451178451179,
"grad_norm": 3.2108795642852783,
"learning_rate": 1.5356994298450989e-06,
"loss": 0.6526933312416077,
"step": 1328
},
{
"epoch": 1.1195286195286196,
"grad_norm": 6.397909164428711,
"learning_rate": 1.5342369231279145e-06,
"loss": 0.994263768196106,
"step": 1330
},
{
"epoch": 1.121212121212121,
"grad_norm": 5.88171911239624,
"learning_rate": 1.5327729202818212e-06,
"loss": 0.7015285491943359,
"step": 1332
},
{
"epoch": 1.1228956228956228,
"grad_norm": 2.6668052673339844,
"learning_rate": 1.5313074263509242e-06,
"loss": 1.0788037776947021,
"step": 1334
},
{
"epoch": 1.1245791245791246,
"grad_norm": 5.609066009521484,
"learning_rate": 1.5298404463844675e-06,
"loss": 0.5919516086578369,
"step": 1336
},
{
"epoch": 1.1262626262626263,
"grad_norm": 3.103581428527832,
"learning_rate": 1.5283719854368142e-06,
"loss": 0.6757215857505798,
"step": 1338
},
{
"epoch": 1.127946127946128,
"grad_norm": 2.8614747524261475,
"learning_rate": 1.5269020485674299e-06,
"loss": 0.4805062413215637,
"step": 1340
},
{
"epoch": 1.1296296296296295,
"grad_norm": 4.264964580535889,
"learning_rate": 1.5254306408408657e-06,
"loss": 0.8218073844909668,
"step": 1342
},
{
"epoch": 1.1313131313131313,
"grad_norm": 3.358206272125244,
"learning_rate": 1.5239577673267401e-06,
"loss": 1.1272187232971191,
"step": 1344
},
{
"epoch": 1.132996632996633,
"grad_norm": 5.68251371383667,
"learning_rate": 1.5224834330997222e-06,
"loss": 1.0079560279846191,
"step": 1346
},
{
"epoch": 1.1346801346801347,
"grad_norm": 5.610229969024658,
"learning_rate": 1.5210076432395138e-06,
"loss": 0.6960790157318115,
"step": 1348
},
{
"epoch": 1.1363636363636362,
"grad_norm": 6.409191608428955,
"learning_rate": 1.5195304028308324e-06,
"loss": 0.48329275846481323,
"step": 1350
},
{
"epoch": 1.138047138047138,
"grad_norm": 17.214502334594727,
"learning_rate": 1.5180517169633914e-06,
"loss": 0.2905687391757965,
"step": 1352
},
{
"epoch": 1.1397306397306397,
"grad_norm": 4.7634406089782715,
"learning_rate": 1.5165715907318874e-06,
"loss": 0.9956916570663452,
"step": 1354
},
{
"epoch": 1.1414141414141414,
"grad_norm": 3.8894872665405273,
"learning_rate": 1.5150900292359775e-06,
"loss": 1.0472840070724487,
"step": 1356
},
{
"epoch": 1.1430976430976432,
"grad_norm": 28.076671600341797,
"learning_rate": 1.513607037580264e-06,
"loss": 0.7530080676078796,
"step": 1358
},
{
"epoch": 1.144781144781145,
"grad_norm": 5.491020679473877,
"learning_rate": 1.5121226208742771e-06,
"loss": 0.6445476412773132,
"step": 1360
},
{
"epoch": 1.1464646464646464,
"grad_norm": 2.686913251876831,
"learning_rate": 1.5106367842324578e-06,
"loss": 0.8437654376029968,
"step": 1362
},
{
"epoch": 1.1481481481481481,
"grad_norm": 35.050662994384766,
"learning_rate": 1.5091495327741375e-06,
"loss": 0.8638776540756226,
"step": 1364
},
{
"epoch": 1.1498316498316499,
"grad_norm": 3.9783761501312256,
"learning_rate": 1.507660871623524e-06,
"loss": 0.7111606597900391,
"step": 1366
},
{
"epoch": 1.1515151515151516,
"grad_norm": 14.50291633605957,
"learning_rate": 1.5061708059096807e-06,
"loss": 0.764883279800415,
"step": 1368
},
{
"epoch": 1.1531986531986531,
"grad_norm": 2.154838800430298,
"learning_rate": 1.5046793407665114e-06,
"loss": 1.0397025346755981,
"step": 1370
},
{
"epoch": 1.1548821548821548,
"grad_norm": 2.365380048751831,
"learning_rate": 1.503186481332741e-06,
"loss": 1.0539653301239014,
"step": 1372
},
{
"epoch": 1.1565656565656566,
"grad_norm": 8.504420280456543,
"learning_rate": 1.5016922327518986e-06,
"loss": 0.4366611838340759,
"step": 1374
},
{
"epoch": 1.1582491582491583,
"grad_norm": 2.675044298171997,
"learning_rate": 1.5001966001722986e-06,
"loss": 0.398744136095047,
"step": 1376
},
{
"epoch": 1.15993265993266,
"grad_norm": 8.629570960998535,
"learning_rate": 1.4986995887470248e-06,
"loss": 0.8844636678695679,
"step": 1378
},
{
"epoch": 1.1616161616161615,
"grad_norm": 2.5665788650512695,
"learning_rate": 1.497201203633912e-06,
"loss": 0.6772328019142151,
"step": 1380
},
{
"epoch": 1.1632996632996633,
"grad_norm": 9.2289457321167,
"learning_rate": 1.4957014499955265e-06,
"loss": 0.5273948907852173,
"step": 1382
},
{
"epoch": 1.164983164983165,
"grad_norm": 4.406887054443359,
"learning_rate": 1.4942003329991513e-06,
"loss": 0.36302030086517334,
"step": 1384
},
{
"epoch": 1.1666666666666667,
"grad_norm": 14.721182823181152,
"learning_rate": 1.492697857816766e-06,
"loss": 0.5152138471603394,
"step": 1386
},
{
"epoch": 1.1683501683501682,
"grad_norm": 2.9244027137756348,
"learning_rate": 1.491194029625029e-06,
"loss": 0.6069843173027039,
"step": 1388
},
{
"epoch": 1.17003367003367,
"grad_norm": 5.622206687927246,
"learning_rate": 1.489688853605262e-06,
"loss": 0.8698340654373169,
"step": 1390
},
{
"epoch": 1.1717171717171717,
"grad_norm": 3.113487482070923,
"learning_rate": 1.4881823349434296e-06,
"loss": 0.8122848272323608,
"step": 1392
},
{
"epoch": 1.1734006734006734,
"grad_norm": 8.594972610473633,
"learning_rate": 1.4866744788301226e-06,
"loss": 0.681936502456665,
"step": 1394
},
{
"epoch": 1.1750841750841752,
"grad_norm": 2.1322364807128906,
"learning_rate": 1.485165290460539e-06,
"loss": 0.571365237236023,
"step": 1396
},
{
"epoch": 1.1767676767676767,
"grad_norm": 3.1892471313476562,
"learning_rate": 1.4836547750344688e-06,
"loss": 0.7035370469093323,
"step": 1398
},
{
"epoch": 1.1784511784511784,
"grad_norm": 15.387435913085938,
"learning_rate": 1.4821429377562725e-06,
"loss": 0.49107053875923157,
"step": 1400
},
{
"epoch": 1.1801346801346801,
"grad_norm": 2.782883644104004,
"learning_rate": 1.4806297838348653e-06,
"loss": 0.9246771931648254,
"step": 1402
},
{
"epoch": 1.1818181818181819,
"grad_norm": 5.081911563873291,
"learning_rate": 1.4791153184837e-06,
"loss": 0.7164801955223083,
"step": 1404
},
{
"epoch": 1.1835016835016834,
"grad_norm": 11.42972469329834,
"learning_rate": 1.4775995469207467e-06,
"loss": 0.6407367587089539,
"step": 1406
},
{
"epoch": 1.1851851851851851,
"grad_norm": 5.799728870391846,
"learning_rate": 1.476082474368476e-06,
"loss": 0.9986523389816284,
"step": 1408
},
{
"epoch": 1.1868686868686869,
"grad_norm": 4.796317100524902,
"learning_rate": 1.4745641060538407e-06,
"loss": 0.700546145439148,
"step": 1410
},
{
"epoch": 1.1885521885521886,
"grad_norm": 21.660324096679688,
"learning_rate": 1.4730444472082597e-06,
"loss": 0.741939902305603,
"step": 1412
},
{
"epoch": 1.1902356902356903,
"grad_norm": 3.5754830837249756,
"learning_rate": 1.471523503067596e-06,
"loss": 0.7933897972106934,
"step": 1414
},
{
"epoch": 1.1919191919191918,
"grad_norm": 6.275886535644531,
"learning_rate": 1.4700012788721431e-06,
"loss": 0.7294763326644897,
"step": 1416
},
{
"epoch": 1.1936026936026936,
"grad_norm": 11.374263763427734,
"learning_rate": 1.4684777798666028e-06,
"loss": 1.066422939300537,
"step": 1418
},
{
"epoch": 1.1952861952861953,
"grad_norm": 8.107324600219727,
"learning_rate": 1.4669530113000712e-06,
"loss": 0.8409990072250366,
"step": 1420
},
{
"epoch": 1.196969696969697,
"grad_norm": 5.618307590484619,
"learning_rate": 1.465426978426017e-06,
"loss": 0.750501275062561,
"step": 1422
},
{
"epoch": 1.1986531986531987,
"grad_norm": 3.1983511447906494,
"learning_rate": 1.4638996865022658e-06,
"loss": 0.611116886138916,
"step": 1424
},
{
"epoch": 1.2003367003367003,
"grad_norm": 7.185869216918945,
"learning_rate": 1.4623711407909802e-06,
"loss": 0.8342564105987549,
"step": 1426
},
{
"epoch": 1.202020202020202,
"grad_norm": 5.156131267547607,
"learning_rate": 1.4608413465586444e-06,
"loss": 0.528020441532135,
"step": 1428
},
{
"epoch": 1.2037037037037037,
"grad_norm": 4.284945964813232,
"learning_rate": 1.4593103090760426e-06,
"loss": 0.867672324180603,
"step": 1430
},
{
"epoch": 1.2053872053872055,
"grad_norm": 4.11072301864624,
"learning_rate": 1.4577780336182429e-06,
"loss": 0.6711719036102295,
"step": 1432
},
{
"epoch": 1.2070707070707072,
"grad_norm": 2.3299851417541504,
"learning_rate": 1.4562445254645793e-06,
"loss": 1.1435985565185547,
"step": 1434
},
{
"epoch": 1.2087542087542087,
"grad_norm": 7.548894882202148,
"learning_rate": 1.4547097898986332e-06,
"loss": 0.5709949731826782,
"step": 1436
},
{
"epoch": 1.2104377104377104,
"grad_norm": 12.143434524536133,
"learning_rate": 1.453173832208213e-06,
"loss": 0.40696626901626587,
"step": 1438
},
{
"epoch": 1.2121212121212122,
"grad_norm": 3.1169068813323975,
"learning_rate": 1.4516366576853406e-06,
"loss": 0.4268173575401306,
"step": 1440
},
{
"epoch": 1.2138047138047139,
"grad_norm": 4.227779388427734,
"learning_rate": 1.450098271626228e-06,
"loss": 0.7122896313667297,
"step": 1442
},
{
"epoch": 1.2154882154882154,
"grad_norm": 7.247793674468994,
"learning_rate": 1.448558679331263e-06,
"loss": 0.8614311814308167,
"step": 1444
},
{
"epoch": 1.2171717171717171,
"grad_norm": 6.6793212890625,
"learning_rate": 1.4470178861049886e-06,
"loss": 0.8972820043563843,
"step": 1446
},
{
"epoch": 1.2188552188552189,
"grad_norm": 4.615921974182129,
"learning_rate": 1.4454758972560863e-06,
"loss": 0.6717212200164795,
"step": 1448
},
{
"epoch": 1.2205387205387206,
"grad_norm": 4.018466949462891,
"learning_rate": 1.4439327180973556e-06,
"loss": 0.8775206208229065,
"step": 1450
},
{
"epoch": 1.2222222222222223,
"grad_norm": 4.282815456390381,
"learning_rate": 1.4423883539456987e-06,
"loss": 0.867609977722168,
"step": 1452
},
{
"epoch": 1.2239057239057238,
"grad_norm": 5.375484466552734,
"learning_rate": 1.4408428101220997e-06,
"loss": 0.6089876294136047,
"step": 1454
},
{
"epoch": 1.2255892255892256,
"grad_norm": 4.924765110015869,
"learning_rate": 1.439296091951607e-06,
"loss": 0.852953314781189,
"step": 1456
},
{
"epoch": 1.2272727272727273,
"grad_norm": 6.108055591583252,
"learning_rate": 1.4377482047633162e-06,
"loss": 0.8556865453720093,
"step": 1458
},
{
"epoch": 1.228956228956229,
"grad_norm": 7.242824077606201,
"learning_rate": 1.4361991538903495e-06,
"loss": 0.9425716400146484,
"step": 1460
},
{
"epoch": 1.2306397306397305,
"grad_norm": 8.90245532989502,
"learning_rate": 1.4346489446698388e-06,
"loss": 0.6341677904129028,
"step": 1462
},
{
"epoch": 1.2323232323232323,
"grad_norm": 4.452878475189209,
"learning_rate": 1.4330975824429076e-06,
"loss": 0.6499779224395752,
"step": 1464
},
{
"epoch": 1.234006734006734,
"grad_norm": 2.3086910247802734,
"learning_rate": 1.4315450725546516e-06,
"loss": 0.8102267384529114,
"step": 1466
},
{
"epoch": 1.2356902356902357,
"grad_norm": 4.407566070556641,
"learning_rate": 1.42999142035412e-06,
"loss": 0.9032129049301147,
"step": 1468
},
{
"epoch": 1.2373737373737375,
"grad_norm": 3.0299272537231445,
"learning_rate": 1.4284366311942985e-06,
"loss": 1.0671682357788086,
"step": 1470
},
{
"epoch": 1.239057239057239,
"grad_norm": 5.777866840362549,
"learning_rate": 1.42688071043209e-06,
"loss": 0.5841819047927856,
"step": 1472
},
{
"epoch": 1.2407407407407407,
"grad_norm": 11.622872352600098,
"learning_rate": 1.4253236634282964e-06,
"loss": 0.6392555236816406,
"step": 1474
},
{
"epoch": 1.2424242424242424,
"grad_norm": 25.52138328552246,
"learning_rate": 1.4237654955475997e-06,
"loss": 0.45820027589797974,
"step": 1476
},
{
"epoch": 1.2441077441077442,
"grad_norm": 7.492943286895752,
"learning_rate": 1.4222062121585438e-06,
"loss": 0.6932016611099243,
"step": 1478
},
{
"epoch": 1.2457912457912457,
"grad_norm": 4.440412998199463,
"learning_rate": 1.4206458186335158e-06,
"loss": 0.7317427396774292,
"step": 1480
},
{
"epoch": 1.2474747474747474,
"grad_norm": 3.7973439693450928,
"learning_rate": 1.4190843203487285e-06,
"loss": 0.7156742811203003,
"step": 1482
},
{
"epoch": 1.2491582491582491,
"grad_norm": 5.348301410675049,
"learning_rate": 1.4175217226842e-06,
"loss": 0.4319908320903778,
"step": 1484
},
{
"epoch": 1.2508417508417509,
"grad_norm": 3.68155574798584,
"learning_rate": 1.4159580310237368e-06,
"loss": 0.5716394186019897,
"step": 1486
},
{
"epoch": 1.2525252525252526,
"grad_norm": 12.937089920043945,
"learning_rate": 1.414393250754915e-06,
"loss": 0.7173076272010803,
"step": 1488
},
{
"epoch": 1.2542087542087543,
"grad_norm": 4.815293312072754,
"learning_rate": 1.4128273872690608e-06,
"loss": 0.6426496505737305,
"step": 1490
},
{
"epoch": 1.2558922558922558,
"grad_norm": 6.455201148986816,
"learning_rate": 1.4112604459612326e-06,
"loss": 0.7094147801399231,
"step": 1492
},
{
"epoch": 1.2575757575757576,
"grad_norm": 2.647298812866211,
"learning_rate": 1.4096924322302025e-06,
"loss": 0.7964801788330078,
"step": 1494
},
{
"epoch": 1.2592592592592593,
"grad_norm": 10.454304695129395,
"learning_rate": 1.4081233514784377e-06,
"loss": 0.6100042462348938,
"step": 1496
},
{
"epoch": 1.2609427609427608,
"grad_norm": 3.6101741790771484,
"learning_rate": 1.4065532091120815e-06,
"loss": 0.9467732906341553,
"step": 1498
},
{
"epoch": 1.2626262626262625,
"grad_norm": 4.737046718597412,
"learning_rate": 1.4049820105409354e-06,
"loss": 0.9984631538391113,
"step": 1500
},
{
"epoch": 1.2643097643097643,
"grad_norm": 7.123760223388672,
"learning_rate": 1.4034097611784388e-06,
"loss": 0.5069697499275208,
"step": 1502
},
{
"epoch": 1.265993265993266,
"grad_norm": 6.340135097503662,
"learning_rate": 1.4018364664416531e-06,
"loss": 0.7557004690170288,
"step": 1504
},
{
"epoch": 1.2676767676767677,
"grad_norm": 2.5414600372314453,
"learning_rate": 1.4002621317512402e-06,
"loss": 1.086498498916626,
"step": 1506
},
{
"epoch": 1.2693602693602695,
"grad_norm": 6.803100109100342,
"learning_rate": 1.3986867625314453e-06,
"loss": 1.1087901592254639,
"step": 1508
},
{
"epoch": 1.271043771043771,
"grad_norm": 17.501358032226562,
"learning_rate": 1.397110364210079e-06,
"loss": 0.5395207405090332,
"step": 1510
},
{
"epoch": 1.2727272727272727,
"grad_norm": 17.035667419433594,
"learning_rate": 1.395532942218496e-06,
"loss": 0.5006218552589417,
"step": 1512
},
{
"epoch": 1.2744107744107744,
"grad_norm": 13.554049491882324,
"learning_rate": 1.393954501991579e-06,
"loss": 0.597407341003418,
"step": 1514
},
{
"epoch": 1.2760942760942762,
"grad_norm": 5.359893321990967,
"learning_rate": 1.3923750489677192e-06,
"loss": 0.7979379892349243,
"step": 1516
},
{
"epoch": 1.2777777777777777,
"grad_norm": 3.440288782119751,
"learning_rate": 1.3907945885887963e-06,
"loss": 0.7031858563423157,
"step": 1518
},
{
"epoch": 1.2794612794612794,
"grad_norm": 2.3797640800476074,
"learning_rate": 1.389213126300161e-06,
"loss": 0.8979378342628479,
"step": 1520
},
{
"epoch": 1.2811447811447811,
"grad_norm": 14.381575584411621,
"learning_rate": 1.3876306675506176e-06,
"loss": 0.6173551082611084,
"step": 1522
},
{
"epoch": 1.2828282828282829,
"grad_norm": 22.606948852539062,
"learning_rate": 1.3860472177924008e-06,
"loss": 0.5981260538101196,
"step": 1524
},
{
"epoch": 1.2845117845117846,
"grad_norm": 9.574856758117676,
"learning_rate": 1.3844627824811623e-06,
"loss": 0.8161386847496033,
"step": 1526
},
{
"epoch": 1.2861952861952861,
"grad_norm": 23.1750431060791,
"learning_rate": 1.3828773670759476e-06,
"loss": 0.7269278764724731,
"step": 1528
},
{
"epoch": 1.2878787878787878,
"grad_norm": 4.434001922607422,
"learning_rate": 1.3812909770391808e-06,
"loss": 0.3289014399051666,
"step": 1530
},
{
"epoch": 1.2895622895622896,
"grad_norm": 4.015097141265869,
"learning_rate": 1.3797036178366422e-06,
"loss": 0.7394604086875916,
"step": 1532
},
{
"epoch": 1.2912457912457913,
"grad_norm": 2.247042179107666,
"learning_rate": 1.3781152949374526e-06,
"loss": 1.0114760398864746,
"step": 1534
},
{
"epoch": 1.2929292929292928,
"grad_norm": 10.264386177062988,
"learning_rate": 1.3765260138140523e-06,
"loss": 0.9329554438591003,
"step": 1536
},
{
"epoch": 1.2946127946127945,
"grad_norm": 7.6681647300720215,
"learning_rate": 1.3749357799421846e-06,
"loss": 0.5743855237960815,
"step": 1538
},
{
"epoch": 1.2962962962962963,
"grad_norm": 51.10832977294922,
"learning_rate": 1.3733445988008729e-06,
"loss": 0.6765563488006592,
"step": 1540
},
{
"epoch": 1.297979797979798,
"grad_norm": 7.140315055847168,
"learning_rate": 1.3717524758724065e-06,
"loss": 0.5998942255973816,
"step": 1542
},
{
"epoch": 1.2996632996632997,
"grad_norm": 5.197514533996582,
"learning_rate": 1.3701594166423182e-06,
"loss": 0.8821581602096558,
"step": 1544
},
{
"epoch": 1.3013468013468015,
"grad_norm": 6.277469158172607,
"learning_rate": 1.3685654265993682e-06,
"loss": 0.767001211643219,
"step": 1546
},
{
"epoch": 1.303030303030303,
"grad_norm": 7.22768497467041,
"learning_rate": 1.366970511235522e-06,
"loss": 0.7709823250770569,
"step": 1548
},
{
"epoch": 1.3047138047138047,
"grad_norm": 4.289220333099365,
"learning_rate": 1.3653746760459345e-06,
"loss": 0.5894149541854858,
"step": 1550
},
{
"epoch": 1.3063973063973064,
"grad_norm": 7.390477657318115,
"learning_rate": 1.3637779265289299e-06,
"loss": 0.8726404905319214,
"step": 1552
},
{
"epoch": 1.308080808080808,
"grad_norm": 10.008243560791016,
"learning_rate": 1.3621802681859812e-06,
"loss": 0.947807788848877,
"step": 1554
},
{
"epoch": 1.3097643097643097,
"grad_norm": 2.8453805446624756,
"learning_rate": 1.3605817065216944e-06,
"loss": 0.8847697973251343,
"step": 1556
},
{
"epoch": 1.3114478114478114,
"grad_norm": 7.134622573852539,
"learning_rate": 1.3589822470437864e-06,
"loss": 0.8395899534225464,
"step": 1558
},
{
"epoch": 1.3131313131313131,
"grad_norm": 22.481409072875977,
"learning_rate": 1.3573818952630683e-06,
"loss": 0.42701858282089233,
"step": 1560
},
{
"epoch": 1.3148148148148149,
"grad_norm": 8.535077095031738,
"learning_rate": 1.3557806566934256e-06,
"loss": 0.5510627627372742,
"step": 1562
},
{
"epoch": 1.3164983164983166,
"grad_norm": 14.953362464904785,
"learning_rate": 1.354178536851799e-06,
"loss": 0.5616642236709595,
"step": 1564
},
{
"epoch": 1.3181818181818181,
"grad_norm": 3.324460983276367,
"learning_rate": 1.3525755412581645e-06,
"loss": 1.04994535446167,
"step": 1566
},
{
"epoch": 1.3198653198653199,
"grad_norm": 11.0078706741333,
"learning_rate": 1.3509716754355174e-06,
"loss": 0.5438690185546875,
"step": 1568
},
{
"epoch": 1.3215488215488216,
"grad_norm": 9.554030418395996,
"learning_rate": 1.34936694490985e-06,
"loss": 0.901394248008728,
"step": 1570
},
{
"epoch": 1.3232323232323233,
"grad_norm": 9.29176139831543,
"learning_rate": 1.3477613552101344e-06,
"loss": 0.7927477359771729,
"step": 1572
},
{
"epoch": 1.3249158249158248,
"grad_norm": 3.3643555641174316,
"learning_rate": 1.3461549118683023e-06,
"loss": 0.6502416133880615,
"step": 1574
},
{
"epoch": 1.3265993265993266,
"grad_norm": 3.0709450244903564,
"learning_rate": 1.344547620419227e-06,
"loss": 0.9406764507293701,
"step": 1576
},
{
"epoch": 1.3282828282828283,
"grad_norm": 74.16036224365234,
"learning_rate": 1.3429394864007037e-06,
"loss": 0.6865894794464111,
"step": 1578
},
{
"epoch": 1.32996632996633,
"grad_norm": 14.486356735229492,
"learning_rate": 1.3413305153534313e-06,
"loss": 0.49478814005851746,
"step": 1580
},
{
"epoch": 1.3316498316498318,
"grad_norm": 64.50064849853516,
"learning_rate": 1.3397207128209916e-06,
"loss": 0.6601588726043701,
"step": 1582
},
{
"epoch": 1.3333333333333333,
"grad_norm": 2.4977774620056152,
"learning_rate": 1.3381100843498315e-06,
"loss": 0.9941089153289795,
"step": 1584
},
{
"epoch": 1.335016835016835,
"grad_norm": 5.635324478149414,
"learning_rate": 1.3364986354892442e-06,
"loss": 0.8192329406738281,
"step": 1586
},
{
"epoch": 1.3367003367003367,
"grad_norm": 3.7212777137756348,
"learning_rate": 1.3348863717913485e-06,
"loss": 0.4632367491722107,
"step": 1588
},
{
"epoch": 1.3383838383838385,
"grad_norm": 2.295429229736328,
"learning_rate": 1.3332732988110717e-06,
"loss": 0.6560972332954407,
"step": 1590
},
{
"epoch": 1.34006734006734,
"grad_norm": 14.497373580932617,
"learning_rate": 1.3316594221061293e-06,
"loss": 0.553842306137085,
"step": 1592
},
{
"epoch": 1.3417508417508417,
"grad_norm": 2.9581053256988525,
"learning_rate": 1.3300447472370047e-06,
"loss": 0.9532322883605957,
"step": 1594
},
{
"epoch": 1.3434343434343434,
"grad_norm": 19.73745346069336,
"learning_rate": 1.3284292797669325e-06,
"loss": 0.3680313229560852,
"step": 1596
},
{
"epoch": 1.3451178451178452,
"grad_norm": 3.8030846118927,
"learning_rate": 1.326813025261878e-06,
"loss": 0.8829873204231262,
"step": 1598
},
{
"epoch": 1.3468013468013469,
"grad_norm": 9.470124244689941,
"learning_rate": 1.3251959892905183e-06,
"loss": 0.7422173023223877,
"step": 1600
},
{
"epoch": 1.3484848484848486,
"grad_norm": 4.198265075683594,
"learning_rate": 1.3235781774242221e-06,
"loss": 0.6670169830322266,
"step": 1602
},
{
"epoch": 1.3501683501683501,
"grad_norm": 11.831036567687988,
"learning_rate": 1.321959595237032e-06,
"loss": 0.8272008895874023,
"step": 1604
},
{
"epoch": 1.3518518518518519,
"grad_norm": 4.924741744995117,
"learning_rate": 1.3203402483056457e-06,
"loss": 1.091449499130249,
"step": 1606
},
{
"epoch": 1.3535353535353536,
"grad_norm": 4.869316101074219,
"learning_rate": 1.3187201422093937e-06,
"loss": 0.8597755432128906,
"step": 1608
},
{
"epoch": 1.355218855218855,
"grad_norm": 9.370150566101074,
"learning_rate": 1.3170992825302231e-06,
"loss": 0.38254064321517944,
"step": 1610
},
{
"epoch": 1.3569023569023568,
"grad_norm": 5.126072883605957,
"learning_rate": 1.315477674852678e-06,
"loss": 0.9957524538040161,
"step": 1612
},
{
"epoch": 1.3585858585858586,
"grad_norm": 4.2908172607421875,
"learning_rate": 1.3138553247638793e-06,
"loss": 0.6559964418411255,
"step": 1614
},
{
"epoch": 1.3602693602693603,
"grad_norm": 9.646893501281738,
"learning_rate": 1.3122322378535052e-06,
"loss": 0.6425015330314636,
"step": 1616
},
{
"epoch": 1.361952861952862,
"grad_norm": 2.957890510559082,
"learning_rate": 1.310608419713773e-06,
"loss": 0.8944872617721558,
"step": 1618
},
{
"epoch": 1.3636363636363638,
"grad_norm": 3.4394900798797607,
"learning_rate": 1.3089838759394198e-06,
"loss": 0.6483921408653259,
"step": 1620
},
{
"epoch": 1.3653198653198653,
"grad_norm": 2.6076972484588623,
"learning_rate": 1.3073586121276824e-06,
"loss": 0.9400961995124817,
"step": 1622
},
{
"epoch": 1.367003367003367,
"grad_norm": 2.1458706855773926,
"learning_rate": 1.3057326338782782e-06,
"loss": 0.8825739622116089,
"step": 1624
},
{
"epoch": 1.3686868686868687,
"grad_norm": 21.359161376953125,
"learning_rate": 1.3041059467933864e-06,
"loss": 0.6030191779136658,
"step": 1626
},
{
"epoch": 1.3703703703703702,
"grad_norm": 19.883914947509766,
"learning_rate": 1.3024785564776287e-06,
"loss": 0.8803253173828125,
"step": 1628
},
{
"epoch": 1.372053872053872,
"grad_norm": 5.972216606140137,
"learning_rate": 1.3008504685380493e-06,
"loss": 0.8786773085594177,
"step": 1630
},
{
"epoch": 1.3737373737373737,
"grad_norm": 4.644904613494873,
"learning_rate": 1.2992216885840964e-06,
"loss": 1.0024290084838867,
"step": 1632
},
{
"epoch": 1.3754208754208754,
"grad_norm": 6.252418041229248,
"learning_rate": 1.297592222227602e-06,
"loss": 0.6154271364212036,
"step": 1634
},
{
"epoch": 1.3771043771043772,
"grad_norm": 5.154648780822754,
"learning_rate": 1.2959620750827637e-06,
"loss": 0.3709207773208618,
"step": 1636
},
{
"epoch": 1.378787878787879,
"grad_norm": 4.736825466156006,
"learning_rate": 1.2943312527661236e-06,
"loss": 0.5821201801300049,
"step": 1638
},
{
"epoch": 1.3804713804713804,
"grad_norm": 2.9232895374298096,
"learning_rate": 1.2926997608965515e-06,
"loss": 0.6593613624572754,
"step": 1640
},
{
"epoch": 1.3821548821548821,
"grad_norm": 3.258718729019165,
"learning_rate": 1.2910676050952232e-06,
"loss": 0.9339215755462646,
"step": 1642
},
{
"epoch": 1.3838383838383839,
"grad_norm": 2.4435172080993652,
"learning_rate": 1.2894347909856021e-06,
"loss": 1.130608081817627,
"step": 1644
},
{
"epoch": 1.3855218855218856,
"grad_norm": 5.7142791748046875,
"learning_rate": 1.2878013241934195e-06,
"loss": 0.7692638635635376,
"step": 1646
},
{
"epoch": 1.387205387205387,
"grad_norm": 2.420278310775757,
"learning_rate": 1.2861672103466564e-06,
"loss": 0.93665611743927,
"step": 1648
},
{
"epoch": 1.3888888888888888,
"grad_norm": 3.4516067504882812,
"learning_rate": 1.284532455075522e-06,
"loss": 0.8558226823806763,
"step": 1650
},
{
"epoch": 1.3905723905723906,
"grad_norm": 4.455197811126709,
"learning_rate": 1.2828970640124361e-06,
"loss": 1.1693918704986572,
"step": 1652
},
{
"epoch": 1.3922558922558923,
"grad_norm": 4.881862640380859,
"learning_rate": 1.281261042792009e-06,
"loss": 0.9461103677749634,
"step": 1654
},
{
"epoch": 1.393939393939394,
"grad_norm": 10.862548828125,
"learning_rate": 1.2796243970510232e-06,
"loss": 0.5996136665344238,
"step": 1656
},
{
"epoch": 1.3956228956228955,
"grad_norm": 3.589484930038452,
"learning_rate": 1.2779871324284106e-06,
"loss": 0.6074084043502808,
"step": 1658
},
{
"epoch": 1.3973063973063973,
"grad_norm": 11.17980670928955,
"learning_rate": 1.2763492545652373e-06,
"loss": 0.9331209659576416,
"step": 1660
},
{
"epoch": 1.398989898989899,
"grad_norm": 19.434432983398438,
"learning_rate": 1.2747107691046815e-06,
"loss": 0.7953930497169495,
"step": 1662
},
{
"epoch": 1.4006734006734007,
"grad_norm": 42.425941467285156,
"learning_rate": 1.2730716816920151e-06,
"loss": 0.7052454352378845,
"step": 1664
},
{
"epoch": 1.4023569023569022,
"grad_norm": 5.138425827026367,
"learning_rate": 1.271431997974584e-06,
"loss": 0.424437016248703,
"step": 1666
},
{
"epoch": 1.404040404040404,
"grad_norm": 9.087939262390137,
"learning_rate": 1.2697917236017886e-06,
"loss": 0.814346194267273,
"step": 1668
},
{
"epoch": 1.4057239057239057,
"grad_norm": 3.4287939071655273,
"learning_rate": 1.2681508642250637e-06,
"loss": 0.7924845218658447,
"step": 1670
},
{
"epoch": 1.4074074074074074,
"grad_norm": 2.349846601486206,
"learning_rate": 1.266509425497861e-06,
"loss": 0.7972933650016785,
"step": 1672
},
{
"epoch": 1.4090909090909092,
"grad_norm": 3.433432102203369,
"learning_rate": 1.2648674130756271e-06,
"loss": 1.136865258216858,
"step": 1674
},
{
"epoch": 1.410774410774411,
"grad_norm": 18.93527603149414,
"learning_rate": 1.2632248326157854e-06,
"loss": 0.4568125009536743,
"step": 1676
},
{
"epoch": 1.4124579124579124,
"grad_norm": 21.089004516601562,
"learning_rate": 1.2615816897777176e-06,
"loss": 0.9250065088272095,
"step": 1678
},
{
"epoch": 1.4141414141414141,
"grad_norm": 3.9571752548217773,
"learning_rate": 1.2599379902227419e-06,
"loss": 1.0160582065582275,
"step": 1680
},
{
"epoch": 1.4158249158249159,
"grad_norm": 2.4356608390808105,
"learning_rate": 1.258293739614094e-06,
"loss": 0.5913569927215576,
"step": 1682
},
{
"epoch": 1.4175084175084174,
"grad_norm": 14.787010192871094,
"learning_rate": 1.2566489436169101e-06,
"loss": 0.46613961458206177,
"step": 1684
},
{
"epoch": 1.4191919191919191,
"grad_norm": 11.936421394348145,
"learning_rate": 1.255003607898204e-06,
"loss": 0.6293203830718994,
"step": 1686
},
{
"epoch": 1.4208754208754208,
"grad_norm": 3.085696220397949,
"learning_rate": 1.2533577381268495e-06,
"loss": 1.1134471893310547,
"step": 1688
},
{
"epoch": 1.4225589225589226,
"grad_norm": 8.348203659057617,
"learning_rate": 1.2517113399735608e-06,
"loss": 0.5143088698387146,
"step": 1690
},
{
"epoch": 1.4242424242424243,
"grad_norm": 21.37081527709961,
"learning_rate": 1.250064419110872e-06,
"loss": 0.6192675828933716,
"step": 1692
},
{
"epoch": 1.425925925925926,
"grad_norm": 3.3926167488098145,
"learning_rate": 1.2484169812131184e-06,
"loss": 0.563998818397522,
"step": 1694
},
{
"epoch": 1.4276094276094276,
"grad_norm": 2.4411673545837402,
"learning_rate": 1.246769031956417e-06,
"loss": 1.2114120721817017,
"step": 1696
},
{
"epoch": 1.4292929292929293,
"grad_norm": 4.939236640930176,
"learning_rate": 1.245120577018646e-06,
"loss": 1.056166410446167,
"step": 1698
},
{
"epoch": 1.430976430976431,
"grad_norm": 3.1179447174072266,
"learning_rate": 1.2434716220794265e-06,
"loss": 0.8100858926773071,
"step": 1700
},
{
"epoch": 1.4326599326599325,
"grad_norm": 2.682645320892334,
"learning_rate": 1.2418221728201023e-06,
"loss": 0.8299959897994995,
"step": 1702
},
{
"epoch": 1.4343434343434343,
"grad_norm": 3.0754740238189697,
"learning_rate": 1.2401722349237198e-06,
"loss": 0.33164000511169434,
"step": 1704
},
{
"epoch": 1.436026936026936,
"grad_norm": 4.3346381187438965,
"learning_rate": 1.238521814075009e-06,
"loss": 0.4199884235858917,
"step": 1706
},
{
"epoch": 1.4377104377104377,
"grad_norm": 12.329163551330566,
"learning_rate": 1.236870915960365e-06,
"loss": 0.9520546197891235,
"step": 1708
},
{
"epoch": 1.4393939393939394,
"grad_norm": 2.5863959789276123,
"learning_rate": 1.2352195462678257e-06,
"loss": 1.0822396278381348,
"step": 1710
},
{
"epoch": 1.4410774410774412,
"grad_norm": 5.638743877410889,
"learning_rate": 1.2335677106870546e-06,
"loss": 0.9755090475082397,
"step": 1712
},
{
"epoch": 1.4427609427609427,
"grad_norm": 2.6220881938934326,
"learning_rate": 1.2319154149093202e-06,
"loss": 0.8935360312461853,
"step": 1714
},
{
"epoch": 1.4444444444444444,
"grad_norm": 10.807649612426758,
"learning_rate": 1.2302626646274773e-06,
"loss": 0.8985303044319153,
"step": 1716
},
{
"epoch": 1.4461279461279462,
"grad_norm": 3.802117109298706,
"learning_rate": 1.228609465535946e-06,
"loss": 0.6814161539077759,
"step": 1718
},
{
"epoch": 1.4478114478114479,
"grad_norm": 8.011700630187988,
"learning_rate": 1.2269558233306918e-06,
"loss": 0.7456521391868591,
"step": 1720
},
{
"epoch": 1.4494949494949494,
"grad_norm": 6.23107385635376,
"learning_rate": 1.2253017437092088e-06,
"loss": 0.589634358882904,
"step": 1722
},
{
"epoch": 1.4511784511784511,
"grad_norm": 3.2185349464416504,
"learning_rate": 1.2236472323704971e-06,
"loss": 0.7695318460464478,
"step": 1724
},
{
"epoch": 1.4528619528619529,
"grad_norm": 5.373349189758301,
"learning_rate": 1.221992295015044e-06,
"loss": 0.8508809208869934,
"step": 1726
},
{
"epoch": 1.4545454545454546,
"grad_norm": 6.226076602935791,
"learning_rate": 1.2203369373448053e-06,
"loss": 0.664426863193512,
"step": 1728
},
{
"epoch": 1.4562289562289563,
"grad_norm": 3.2036166191101074,
"learning_rate": 1.2186811650631847e-06,
"loss": 0.9715543389320374,
"step": 1730
},
{
"epoch": 1.457912457912458,
"grad_norm": 2.1510095596313477,
"learning_rate": 1.217024983875014e-06,
"loss": 1.2159640789031982,
"step": 1732
},
{
"epoch": 1.4595959595959596,
"grad_norm": 2.128190040588379,
"learning_rate": 1.2153683994865354e-06,
"loss": 0.8712791800498962,
"step": 1734
},
{
"epoch": 1.4612794612794613,
"grad_norm": 15.694469451904297,
"learning_rate": 1.213711417605378e-06,
"loss": 0.6612798571586609,
"step": 1736
},
{
"epoch": 1.462962962962963,
"grad_norm": 3.5540852546691895,
"learning_rate": 1.2120540439405418e-06,
"loss": 0.6000321507453918,
"step": 1738
},
{
"epoch": 1.4646464646464645,
"grad_norm": 5.9053730964660645,
"learning_rate": 1.2103962842023765e-06,
"loss": 1.0903751850128174,
"step": 1740
},
{
"epoch": 1.4663299663299663,
"grad_norm": 3.0747792720794678,
"learning_rate": 1.2087381441025624e-06,
"loss": 0.6912112236022949,
"step": 1742
},
{
"epoch": 1.468013468013468,
"grad_norm": 4.498322010040283,
"learning_rate": 1.2070796293540887e-06,
"loss": 0.5265808701515198,
"step": 1744
},
{
"epoch": 1.4696969696969697,
"grad_norm": 3.914283275604248,
"learning_rate": 1.2054207456712377e-06,
"loss": 0.9266606569290161,
"step": 1746
},
{
"epoch": 1.4713804713804715,
"grad_norm": 3.2208728790283203,
"learning_rate": 1.2037614987695609e-06,
"loss": 0.9809207916259766,
"step": 1748
},
{
"epoch": 1.4730639730639732,
"grad_norm": 4.662408828735352,
"learning_rate": 1.2021018943658623e-06,
"loss": 0.7404388189315796,
"step": 1750
},
{
"epoch": 1.4747474747474747,
"grad_norm": 2.950866460800171,
"learning_rate": 1.2004419381781779e-06,
"loss": 0.6600291728973389,
"step": 1752
},
{
"epoch": 1.4764309764309764,
"grad_norm": 7.190127372741699,
"learning_rate": 1.1987816359257543e-06,
"loss": 0.6781315803527832,
"step": 1754
},
{
"epoch": 1.4781144781144782,
"grad_norm": 9.120945930480957,
"learning_rate": 1.1971209933290318e-06,
"loss": 0.8286664485931396,
"step": 1756
},
{
"epoch": 1.4797979797979797,
"grad_norm": 46.43217468261719,
"learning_rate": 1.1954600161096226e-06,
"loss": 0.6408827900886536,
"step": 1758
},
{
"epoch": 1.4814814814814814,
"grad_norm": 3.931215286254883,
"learning_rate": 1.1937987099902927e-06,
"loss": 0.7160297632217407,
"step": 1760
},
{
"epoch": 1.4831649831649831,
"grad_norm": 2.768970251083374,
"learning_rate": 1.19213708069494e-06,
"loss": 0.9132235050201416,
"step": 1762
},
{
"epoch": 1.4848484848484849,
"grad_norm": 3.2081525325775146,
"learning_rate": 1.190475133948577e-06,
"loss": 0.8853850364685059,
"step": 1764
},
{
"epoch": 1.4865319865319866,
"grad_norm": 7.524960041046143,
"learning_rate": 1.1888128754773092e-06,
"loss": 0.6852905750274658,
"step": 1766
},
{
"epoch": 1.4882154882154883,
"grad_norm": 4.307741165161133,
"learning_rate": 1.1871503110083167e-06,
"loss": 0.7655327320098877,
"step": 1768
},
{
"epoch": 1.4898989898989898,
"grad_norm": 3.650569200515747,
"learning_rate": 1.1854874462698337e-06,
"loss": 0.9417293071746826,
"step": 1770
},
{
"epoch": 1.4915824915824916,
"grad_norm": 5.581574440002441,
"learning_rate": 1.1838242869911285e-06,
"loss": 0.3258330821990967,
"step": 1772
},
{
"epoch": 1.4932659932659933,
"grad_norm": 2.098912000656128,
"learning_rate": 1.182160838902485e-06,
"loss": 0.826897144317627,
"step": 1774
},
{
"epoch": 1.494949494949495,
"grad_norm": 7.627374172210693,
"learning_rate": 1.1804971077351818e-06,
"loss": 0.7514946460723877,
"step": 1776
},
{
"epoch": 1.4966329966329965,
"grad_norm": 3.7137930393218994,
"learning_rate": 1.1788330992214724e-06,
"loss": 0.8887453079223633,
"step": 1778
},
{
"epoch": 1.4983164983164983,
"grad_norm": 8.848133087158203,
"learning_rate": 1.1771688190945664e-06,
"loss": 0.9019075036048889,
"step": 1780
},
{
"epoch": 1.5,
"grad_norm": 8.9419584274292,
"learning_rate": 1.1755042730886093e-06,
"loss": 0.5869305729866028,
"step": 1782
},
{
"epoch": 1.5016835016835017,
"grad_norm": 2.39841365814209,
"learning_rate": 1.1738394669386621e-06,
"loss": 1.1196240186691284,
"step": 1784
},
{
"epoch": 1.5033670033670035,
"grad_norm": 6.431698322296143,
"learning_rate": 1.172174406380683e-06,
"loss": 0.807545006275177,
"step": 1786
},
{
"epoch": 1.5050505050505052,
"grad_norm": 3.8912956714630127,
"learning_rate": 1.170509097151506e-06,
"loss": 0.9450180530548096,
"step": 1788
},
{
"epoch": 1.5067340067340067,
"grad_norm": 22.158241271972656,
"learning_rate": 1.168843544988822e-06,
"loss": 0.6185091733932495,
"step": 1790
},
{
"epoch": 1.5084175084175084,
"grad_norm": 7.974305629730225,
"learning_rate": 1.1671777556311587e-06,
"loss": 0.6012750267982483,
"step": 1792
},
{
"epoch": 1.51010101010101,
"grad_norm": 2.431042432785034,
"learning_rate": 1.1655117348178619e-06,
"loss": 0.8983908891677856,
"step": 1794
},
{
"epoch": 1.5117845117845117,
"grad_norm": 10.86044692993164,
"learning_rate": 1.163845488289074e-06,
"loss": 0.8865917921066284,
"step": 1796
},
{
"epoch": 1.5134680134680134,
"grad_norm": 12.615477561950684,
"learning_rate": 1.1621790217857153e-06,
"loss": 0.9755824208259583,
"step": 1798
},
{
"epoch": 1.5151515151515151,
"grad_norm": 4.471153736114502,
"learning_rate": 1.1605123410494643e-06,
"loss": 0.678105890750885,
"step": 1800
},
{
"epoch": 1.5168350168350169,
"grad_norm": 2.3955981731414795,
"learning_rate": 1.1588454518227375e-06,
"loss": 1.0274368524551392,
"step": 1802
},
{
"epoch": 1.5185185185185186,
"grad_norm": 3.6730523109436035,
"learning_rate": 1.157178359848669e-06,
"loss": 0.9852594137191772,
"step": 1804
},
{
"epoch": 1.5202020202020203,
"grad_norm": 4.832586288452148,
"learning_rate": 1.155511070871093e-06,
"loss": 0.7990705966949463,
"step": 1806
},
{
"epoch": 1.5218855218855218,
"grad_norm": 7.295440196990967,
"learning_rate": 1.1538435906345213e-06,
"loss": 0.7585336565971375,
"step": 1808
},
{
"epoch": 1.5235690235690236,
"grad_norm": 5.79640531539917,
"learning_rate": 1.1521759248841237e-06,
"loss": 0.6978878974914551,
"step": 1810
},
{
"epoch": 1.5252525252525253,
"grad_norm": 3.875293016433716,
"learning_rate": 1.1505080793657124e-06,
"loss": 0.22595882415771484,
"step": 1812
},
{
"epoch": 1.5269360269360268,
"grad_norm": 3.867565870285034,
"learning_rate": 1.1488400598257157e-06,
"loss": 1.1055881977081299,
"step": 1814
},
{
"epoch": 1.5286195286195285,
"grad_norm": 50.10768127441406,
"learning_rate": 1.1471718720111629e-06,
"loss": 0.7640130519866943,
"step": 1816
},
{
"epoch": 1.5303030303030303,
"grad_norm": 20.99407196044922,
"learning_rate": 1.1455035216696634e-06,
"loss": 0.8898581266403198,
"step": 1818
},
{
"epoch": 1.531986531986532,
"grad_norm": 3.8618974685668945,
"learning_rate": 1.1438350145493853e-06,
"loss": 0.7621004581451416,
"step": 1820
},
{
"epoch": 1.5336700336700337,
"grad_norm": 5.8136162757873535,
"learning_rate": 1.1421663563990383e-06,
"loss": 0.7234241962432861,
"step": 1822
},
{
"epoch": 1.5353535353535355,
"grad_norm": 2.8319544792175293,
"learning_rate": 1.1404975529678515e-06,
"loss": 0.9921367168426514,
"step": 1824
},
{
"epoch": 1.5370370370370372,
"grad_norm": 2.6894915103912354,
"learning_rate": 1.1388286100055555e-06,
"loss": 0.841090738773346,
"step": 1826
},
{
"epoch": 1.5387205387205387,
"grad_norm": 2.3952138423919678,
"learning_rate": 1.1371595332623601e-06,
"loss": 0.8845152258872986,
"step": 1828
},
{
"epoch": 1.5404040404040404,
"grad_norm": 7.501322269439697,
"learning_rate": 1.1354903284889377e-06,
"loss": 0.7155517935752869,
"step": 1830
},
{
"epoch": 1.542087542087542,
"grad_norm": 7.9082136154174805,
"learning_rate": 1.133821001436401e-06,
"loss": 0.7049411535263062,
"step": 1832
},
{
"epoch": 1.5437710437710437,
"grad_norm": 2.185568332672119,
"learning_rate": 1.1321515578562835e-06,
"loss": 1.0648796558380127,
"step": 1834
},
{
"epoch": 1.5454545454545454,
"grad_norm": 17.329938888549805,
"learning_rate": 1.1304820035005211e-06,
"loss": 0.8813831806182861,
"step": 1836
},
{
"epoch": 1.5471380471380471,
"grad_norm": 1.5673277378082275,
"learning_rate": 1.1288123441214315e-06,
"loss": 0.45255547761917114,
"step": 1838
},
{
"epoch": 1.5488215488215489,
"grad_norm": 3.232985258102417,
"learning_rate": 1.1271425854716931e-06,
"loss": 0.6964028477668762,
"step": 1840
},
{
"epoch": 1.5505050505050506,
"grad_norm": 4.322386741638184,
"learning_rate": 1.125472733304327e-06,
"loss": 0.6157456636428833,
"step": 1842
},
{
"epoch": 1.5521885521885523,
"grad_norm": 4.216830730438232,
"learning_rate": 1.1238027933726776e-06,
"loss": 0.4383459687232971,
"step": 1844
},
{
"epoch": 1.5538720538720538,
"grad_norm": 3.0813772678375244,
"learning_rate": 1.122132771430389e-06,
"loss": 0.9130579233169556,
"step": 1846
},
{
"epoch": 1.5555555555555556,
"grad_norm": 4.2144975662231445,
"learning_rate": 1.1204626732313907e-06,
"loss": 0.9694530963897705,
"step": 1848
},
{
"epoch": 1.557239057239057,
"grad_norm": 3.75293231010437,
"learning_rate": 1.1187925045298732e-06,
"loss": 0.7483557462692261,
"step": 1850
},
{
"epoch": 1.5589225589225588,
"grad_norm": 7.035089015960693,
"learning_rate": 1.1171222710802704e-06,
"loss": 0.9532842040061951,
"step": 1852
},
{
"epoch": 1.5606060606060606,
"grad_norm": 4.142365455627441,
"learning_rate": 1.1154519786372392e-06,
"loss": 0.5940355658531189,
"step": 1854
},
{
"epoch": 1.5622895622895623,
"grad_norm": 1.9475144147872925,
"learning_rate": 1.1137816329556403e-06,
"loss": 0.6380103826522827,
"step": 1856
},
{
"epoch": 1.563973063973064,
"grad_norm": 2.4910194873809814,
"learning_rate": 1.112111239790517e-06,
"loss": 0.9142417907714844,
"step": 1858
},
{
"epoch": 1.5656565656565657,
"grad_norm": 5.697439193725586,
"learning_rate": 1.1104408048970765e-06,
"loss": 0.4324049949645996,
"step": 1860
},
{
"epoch": 1.5673400673400675,
"grad_norm": 7.662766456604004,
"learning_rate": 1.1087703340306707e-06,
"loss": 0.9493654370307922,
"step": 1862
},
{
"epoch": 1.569023569023569,
"grad_norm": 2.1827774047851562,
"learning_rate": 1.1070998329467738e-06,
"loss": 0.355845183134079,
"step": 1864
},
{
"epoch": 1.5707070707070707,
"grad_norm": 7.288192272186279,
"learning_rate": 1.1054293074009646e-06,
"loss": 1.0024428367614746,
"step": 1866
},
{
"epoch": 1.5723905723905722,
"grad_norm": 7.846567630767822,
"learning_rate": 1.1037587631489077e-06,
"loss": 0.600260853767395,
"step": 1868
},
{
"epoch": 1.574074074074074,
"grad_norm": 3.9028728008270264,
"learning_rate": 1.1020882059463297e-06,
"loss": 0.8100777268409729,
"step": 1870
},
{
"epoch": 1.5757575757575757,
"grad_norm": 4.646785736083984,
"learning_rate": 1.1004176415490036e-06,
"loss": 0.7916046380996704,
"step": 1872
},
{
"epoch": 1.5774410774410774,
"grad_norm": 2.543654680252075,
"learning_rate": 1.0987470757127267e-06,
"loss": 0.9251663684844971,
"step": 1874
},
{
"epoch": 1.5791245791245792,
"grad_norm": 21.24106788635254,
"learning_rate": 1.0970765141933012e-06,
"loss": 0.5762704610824585,
"step": 1876
},
{
"epoch": 1.5808080808080809,
"grad_norm": 2.501488447189331,
"learning_rate": 1.0954059627465144e-06,
"loss": 1.1238887310028076,
"step": 1878
},
{
"epoch": 1.5824915824915826,
"grad_norm": 5.235997200012207,
"learning_rate": 1.093735427128119e-06,
"loss": 0.7707400321960449,
"step": 1880
},
{
"epoch": 1.5841750841750841,
"grad_norm": 5.779091835021973,
"learning_rate": 1.092064913093813e-06,
"loss": 0.4793959856033325,
"step": 1882
},
{
"epoch": 1.5858585858585859,
"grad_norm": 7.471992015838623,
"learning_rate": 1.09039442639922e-06,
"loss": 0.5366681814193726,
"step": 1884
},
{
"epoch": 1.5875420875420876,
"grad_norm": 3.628077983856201,
"learning_rate": 1.0887239727998697e-06,
"loss": 0.6487268209457397,
"step": 1886
},
{
"epoch": 1.589225589225589,
"grad_norm": 3.7435550689697266,
"learning_rate": 1.0870535580511778e-06,
"loss": 0.996959388256073,
"step": 1888
},
{
"epoch": 1.5909090909090908,
"grad_norm": 4.557770252227783,
"learning_rate": 1.0853831879084254e-06,
"loss": 0.2108735740184784,
"step": 1890
},
{
"epoch": 1.5925925925925926,
"grad_norm": 4.259451389312744,
"learning_rate": 1.0837128681267409e-06,
"loss": 1.057731032371521,
"step": 1892
},
{
"epoch": 1.5942760942760943,
"grad_norm": 3.0099260807037354,
"learning_rate": 1.082042604461079e-06,
"loss": 0.8130640983581543,
"step": 1894
},
{
"epoch": 1.595959595959596,
"grad_norm": 7.435500144958496,
"learning_rate": 1.0803724026662e-06,
"loss": 0.9344555139541626,
"step": 1896
},
{
"epoch": 1.5976430976430978,
"grad_norm": 4.205924034118652,
"learning_rate": 1.0787022684966524e-06,
"loss": 0.8660852313041687,
"step": 1898
},
{
"epoch": 1.5993265993265995,
"grad_norm": 14.64234447479248,
"learning_rate": 1.0770322077067512e-06,
"loss": 0.7825689315795898,
"step": 1900
},
{
"epoch": 1.601010101010101,
"grad_norm": 2.525815725326538,
"learning_rate": 1.0753622260505582e-06,
"loss": 0.8996245265007019,
"step": 1902
},
{
"epoch": 1.6026936026936027,
"grad_norm": 5.750382423400879,
"learning_rate": 1.0736923292818631e-06,
"loss": 0.7357829213142395,
"step": 1904
},
{
"epoch": 1.6043771043771042,
"grad_norm": 2.830305814743042,
"learning_rate": 1.0720225231541629e-06,
"loss": 1.1233978271484375,
"step": 1906
},
{
"epoch": 1.606060606060606,
"grad_norm": 6.201582908630371,
"learning_rate": 1.0703528134206418e-06,
"loss": 0.9390593767166138,
"step": 1908
},
{
"epoch": 1.6077441077441077,
"grad_norm": 2.511575698852539,
"learning_rate": 1.0686832058341534e-06,
"loss": 0.5838450789451599,
"step": 1910
},
{
"epoch": 1.6094276094276094,
"grad_norm": 9.2995023727417,
"learning_rate": 1.0670137061471972e-06,
"loss": 0.5779824256896973,
"step": 1912
},
{
"epoch": 1.6111111111111112,
"grad_norm": 10.087990760803223,
"learning_rate": 1.0653443201119026e-06,
"loss": 0.7840274572372437,
"step": 1914
},
{
"epoch": 1.612794612794613,
"grad_norm": 3.4181957244873047,
"learning_rate": 1.063675053480007e-06,
"loss": 0.6986541152000427,
"step": 1916
},
{
"epoch": 1.6144781144781146,
"grad_norm": 29.79077911376953,
"learning_rate": 1.0620059120028363e-06,
"loss": 0.6631942987442017,
"step": 1918
},
{
"epoch": 1.6161616161616161,
"grad_norm": 7.215582370758057,
"learning_rate": 1.0603369014312848e-06,
"loss": 0.6879869699478149,
"step": 1920
},
{
"epoch": 1.6178451178451179,
"grad_norm": 2.632085084915161,
"learning_rate": 1.0586680275157966e-06,
"loss": 0.8899586200714111,
"step": 1922
},
{
"epoch": 1.6195286195286194,
"grad_norm": 2.167722225189209,
"learning_rate": 1.0569992960063445e-06,
"loss": 0.5768526792526245,
"step": 1924
},
{
"epoch": 1.621212121212121,
"grad_norm": 4.157503604888916,
"learning_rate": 1.0553307126524105e-06,
"loss": 0.6109682321548462,
"step": 1926
},
{
"epoch": 1.6228956228956228,
"grad_norm": 2.805830478668213,
"learning_rate": 1.0536622832029663e-06,
"loss": 0.741910457611084,
"step": 1928
},
{
"epoch": 1.6245791245791246,
"grad_norm": 8.529329299926758,
"learning_rate": 1.0519940134064535e-06,
"loss": 0.8265746831893921,
"step": 1930
},
{
"epoch": 1.6262626262626263,
"grad_norm": 3.2494988441467285,
"learning_rate": 1.0503259090107635e-06,
"loss": 0.664577841758728,
"step": 1932
},
{
"epoch": 1.627946127946128,
"grad_norm": 5.897353172302246,
"learning_rate": 1.0486579757632177e-06,
"loss": 0.9694902896881104,
"step": 1934
},
{
"epoch": 1.6296296296296298,
"grad_norm": 5.868167400360107,
"learning_rate": 1.046990219410548e-06,
"loss": 0.9580270648002625,
"step": 1936
},
{
"epoch": 1.6313131313131313,
"grad_norm": 5.813265323638916,
"learning_rate": 1.0453226456988766e-06,
"loss": 1.0353319644927979,
"step": 1938
},
{
"epoch": 1.632996632996633,
"grad_norm": 8.491958618164062,
"learning_rate": 1.0436552603736967e-06,
"loss": 0.8483461141586304,
"step": 1940
},
{
"epoch": 1.6346801346801347,
"grad_norm": 2.543708086013794,
"learning_rate": 1.0419880691798526e-06,
"loss": 1.0242235660552979,
"step": 1942
},
{
"epoch": 1.6363636363636362,
"grad_norm": 2.253805160522461,
"learning_rate": 1.040321077861519e-06,
"loss": 0.7730292677879333,
"step": 1944
},
{
"epoch": 1.638047138047138,
"grad_norm": 2.954116106033325,
"learning_rate": 1.0386542921621824e-06,
"loss": 0.4111822545528412,
"step": 1946
},
{
"epoch": 1.6397306397306397,
"grad_norm": 6.733564853668213,
"learning_rate": 1.036987717824621e-06,
"loss": 0.9653711318969727,
"step": 1948
},
{
"epoch": 1.6414141414141414,
"grad_norm": 4.305788993835449,
"learning_rate": 1.0353213605908854e-06,
"loss": 0.9876930713653564,
"step": 1950
},
{
"epoch": 1.6430976430976432,
"grad_norm": 5.421419143676758,
"learning_rate": 1.0336552262022756e-06,
"loss": 0.49330899119377136,
"step": 1952
},
{
"epoch": 1.644781144781145,
"grad_norm": 6.326197624206543,
"learning_rate": 1.0319893203993276e-06,
"loss": 0.42090070247650146,
"step": 1954
},
{
"epoch": 1.6464646464646466,
"grad_norm": 16.150659561157227,
"learning_rate": 1.0303236489217863e-06,
"loss": 0.22029098868370056,
"step": 1956
},
{
"epoch": 1.6481481481481481,
"grad_norm": 5.668072700500488,
"learning_rate": 1.0286582175085913e-06,
"loss": 0.6529502868652344,
"step": 1958
},
{
"epoch": 1.6498316498316499,
"grad_norm": 2.8413267135620117,
"learning_rate": 1.0269930318978552e-06,
"loss": 0.7630746960639954,
"step": 1960
},
{
"epoch": 1.6515151515151514,
"grad_norm": 10.319131851196289,
"learning_rate": 1.0253280978268421e-06,
"loss": 0.6666793823242188,
"step": 1962
},
{
"epoch": 1.6531986531986531,
"grad_norm": 9.414068222045898,
"learning_rate": 1.0236634210319507e-06,
"loss": 0.5435478687286377,
"step": 1964
},
{
"epoch": 1.6548821548821548,
"grad_norm": 12.622198104858398,
"learning_rate": 1.0219990072486938e-06,
"loss": 0.6335460543632507,
"step": 1966
},
{
"epoch": 1.6565656565656566,
"grad_norm": 1.7483079433441162,
"learning_rate": 1.020334862211676e-06,
"loss": 0.8370047211647034,
"step": 1968
},
{
"epoch": 1.6582491582491583,
"grad_norm": 12.047608375549316,
"learning_rate": 1.0186709916545775e-06,
"loss": 0.7684140205383301,
"step": 1970
},
{
"epoch": 1.65993265993266,
"grad_norm": 10.904447555541992,
"learning_rate": 1.0170074013101329e-06,
"loss": 0.9606258869171143,
"step": 1972
},
{
"epoch": 1.6616161616161618,
"grad_norm": 2.283515453338623,
"learning_rate": 1.0153440969101103e-06,
"loss": 0.7740556001663208,
"step": 1974
},
{
"epoch": 1.6632996632996633,
"grad_norm": 3.3896608352661133,
"learning_rate": 1.0136810841852937e-06,
"loss": 0.7479045391082764,
"step": 1976
},
{
"epoch": 1.664983164983165,
"grad_norm": 11.400617599487305,
"learning_rate": 1.0120183688654616e-06,
"loss": 0.743224024772644,
"step": 1978
},
{
"epoch": 1.6666666666666665,
"grad_norm": 1.4617348909378052,
"learning_rate": 1.0103559566793679e-06,
"loss": 0.7983130216598511,
"step": 1980
},
{
"epoch": 1.6683501683501682,
"grad_norm": 7.328155994415283,
"learning_rate": 1.0086938533547213e-06,
"loss": 0.5365386009216309,
"step": 1982
},
{
"epoch": 1.67003367003367,
"grad_norm": 12.693415641784668,
"learning_rate": 1.0070320646181684e-06,
"loss": 0.46709537506103516,
"step": 1984
},
{
"epoch": 1.6717171717171717,
"grad_norm": 3.179992437362671,
"learning_rate": 1.0053705961952697e-06,
"loss": 1.0043718814849854,
"step": 1986
},
{
"epoch": 1.6734006734006734,
"grad_norm": 2.304699182510376,
"learning_rate": 1.0037094538104832e-06,
"loss": 0.8764192461967468,
"step": 1988
},
{
"epoch": 1.6750841750841752,
"grad_norm": 2.90543270111084,
"learning_rate": 1.002048643187143e-06,
"loss": 0.6470460891723633,
"step": 1990
},
{
"epoch": 1.676767676767677,
"grad_norm": 3.9131369590759277,
"learning_rate": 1.0003881700474415e-06,
"loss": 1.0713807344436646,
"step": 1992
},
{
"epoch": 1.6784511784511784,
"grad_norm": 12.474353790283203,
"learning_rate": 9.987280401124063e-07,
"loss": 0.6647155284881592,
"step": 1994
},
{
"epoch": 1.6801346801346801,
"grad_norm": 2.7717926502227783,
"learning_rate": 9.970682591018842e-07,
"loss": 0.6175976991653442,
"step": 1996
},
{
"epoch": 1.6818181818181817,
"grad_norm": 1.6829837560653687,
"learning_rate": 9.95408832734519e-07,
"loss": 0.9222723245620728,
"step": 1998
},
{
"epoch": 1.6835016835016834,
"grad_norm": 61.439422607421875,
"learning_rate": 9.937497667277322e-07,
"loss": 0.7147092819213867,
"step": 2000
},
{
"epoch": 1.6851851851851851,
"grad_norm": 4.989965438842773,
"learning_rate": 9.92091066797705e-07,
"loss": 0.6293914914131165,
"step": 2002
},
{
"epoch": 1.6868686868686869,
"grad_norm": 11.067621231079102,
"learning_rate": 9.904327386593563e-07,
"loss": 0.652735710144043,
"step": 2004
},
{
"epoch": 1.6885521885521886,
"grad_norm": 7.8212666511535645,
"learning_rate": 9.887747880263236e-07,
"loss": 0.6376103162765503,
"step": 2006
},
{
"epoch": 1.6902356902356903,
"grad_norm": 3.7688381671905518,
"learning_rate": 9.871172206109458e-07,
"loss": 0.9424273371696472,
"step": 2008
},
{
"epoch": 1.691919191919192,
"grad_norm": 5.420353889465332,
"learning_rate": 9.854600421242396e-07,
"loss": 0.5027921199798584,
"step": 2010
},
{
"epoch": 1.6936026936026936,
"grad_norm": 4.543862819671631,
"learning_rate": 9.838032582758814e-07,
"loss": 0.82335364818573,
"step": 2012
},
{
"epoch": 1.6952861952861953,
"grad_norm": 3.9203450679779053,
"learning_rate": 9.821468747741893e-07,
"loss": 0.5697500705718994,
"step": 2014
},
{
"epoch": 1.696969696969697,
"grad_norm": 4.254537582397461,
"learning_rate": 9.804908973261012e-07,
"loss": 0.7458208799362183,
"step": 2016
},
{
"epoch": 1.6986531986531985,
"grad_norm": 35.745418548583984,
"learning_rate": 9.788353316371562e-07,
"loss": 0.7252602577209473,
"step": 2018
},
{
"epoch": 1.7003367003367003,
"grad_norm": 5.118950366973877,
"learning_rate": 9.771801834114748e-07,
"loss": 0.721235454082489,
"step": 2020
},
{
"epoch": 1.702020202020202,
"grad_norm": 8.20414924621582,
"learning_rate": 9.755254583517394e-07,
"loss": 1.0950629711151123,
"step": 2022
},
{
"epoch": 1.7037037037037037,
"grad_norm": 3.2535030841827393,
"learning_rate": 9.738711621591733e-07,
"loss": 0.7883695363998413,
"step": 2024
},
{
"epoch": 1.7053872053872055,
"grad_norm": 2.4924561977386475,
"learning_rate": 9.722173005335235e-07,
"loss": 0.8893304467201233,
"step": 2026
},
{
"epoch": 1.7070707070707072,
"grad_norm": 3.33543062210083,
"learning_rate": 9.705638791730391e-07,
"loss": 0.9973706007003784,
"step": 2028
},
{
"epoch": 1.708754208754209,
"grad_norm": 12.050497055053711,
"learning_rate": 9.689109037744522e-07,
"loss": 0.6256110668182373,
"step": 2030
},
{
"epoch": 1.7104377104377104,
"grad_norm": 7.641107082366943,
"learning_rate": 9.672583800329585e-07,
"loss": 0.4611208438873291,
"step": 2032
},
{
"epoch": 1.7121212121212122,
"grad_norm": 4.1710405349731445,
"learning_rate": 9.65606313642198e-07,
"loss": 0.8477398157119751,
"step": 2034
},
{
"epoch": 1.7138047138047137,
"grad_norm": 12.162333488464355,
"learning_rate": 9.63954710294234e-07,
"loss": 0.7969092130661011,
"step": 2036
},
{
"epoch": 1.7154882154882154,
"grad_norm": 6.495959281921387,
"learning_rate": 9.623035756795352e-07,
"loss": 0.41181480884552,
"step": 2038
},
{
"epoch": 1.7171717171717171,
"grad_norm": 5.608903408050537,
"learning_rate": 9.606529154869556e-07,
"loss": 0.45445549488067627,
"step": 2040
},
{
"epoch": 1.7188552188552189,
"grad_norm": 3.937591552734375,
"learning_rate": 9.590027354037134e-07,
"loss": 0.8946130275726318,
"step": 2042
},
{
"epoch": 1.7205387205387206,
"grad_norm": 3.99568247795105,
"learning_rate": 9.573530411153732e-07,
"loss": 0.8655031323432922,
"step": 2044
},
{
"epoch": 1.7222222222222223,
"grad_norm": 7.455286502838135,
"learning_rate": 9.557038383058265e-07,
"loss": 1.0632479190826416,
"step": 2046
},
{
"epoch": 1.723905723905724,
"grad_norm": 2.330151081085205,
"learning_rate": 9.540551326572709e-07,
"loss": 1.0349470376968384,
"step": 2048
},
{
"epoch": 1.7255892255892256,
"grad_norm": 6.064199924468994,
"learning_rate": 9.524069298501902e-07,
"loss": 0.41284000873565674,
"step": 2050
},
{
"epoch": 1.7272727272727273,
"grad_norm": 1.6751161813735962,
"learning_rate": 9.507592355633376e-07,
"loss": 1.0285980701446533,
"step": 2052
},
{
"epoch": 1.7289562289562288,
"grad_norm": 26.606491088867188,
"learning_rate": 9.491120554737126e-07,
"loss": 0.9353586435317993,
"step": 2054
},
{
"epoch": 1.7306397306397305,
"grad_norm": 4.331685543060303,
"learning_rate": 9.474653952565439e-07,
"loss": 0.7286108732223511,
"step": 2056
},
{
"epoch": 1.7323232323232323,
"grad_norm": 2.1677701473236084,
"learning_rate": 9.458192605852691e-07,
"loss": 1.0569818019866943,
"step": 2058
},
{
"epoch": 1.734006734006734,
"grad_norm": 2.619204521179199,
"learning_rate": 9.441736571315142e-07,
"loss": 0.620589554309845,
"step": 2060
},
{
"epoch": 1.7356902356902357,
"grad_norm": 5.867666721343994,
"learning_rate": 9.425285905650755e-07,
"loss": 0.9633854627609253,
"step": 2062
},
{
"epoch": 1.7373737373737375,
"grad_norm": 16.939653396606445,
"learning_rate": 9.408840665538999e-07,
"loss": 0.6605305671691895,
"step": 2064
},
{
"epoch": 1.7390572390572392,
"grad_norm": 2.5597705841064453,
"learning_rate": 9.392400907640645e-07,
"loss": 0.6780143976211548,
"step": 2066
},
{
"epoch": 1.7407407407407407,
"grad_norm": 14.445930480957031,
"learning_rate": 9.375966688597572e-07,
"loss": 0.8258605003356934,
"step": 2068
},
{
"epoch": 1.7424242424242424,
"grad_norm": 5.176375389099121,
"learning_rate": 9.359538065032586e-07,
"loss": 0.7047204971313477,
"step": 2070
},
{
"epoch": 1.7441077441077442,
"grad_norm": 9.773624420166016,
"learning_rate": 9.343115093549203e-07,
"loss": 0.6722849011421204,
"step": 2072
},
{
"epoch": 1.7457912457912457,
"grad_norm": 3.369567394256592,
"learning_rate": 9.32669783073147e-07,
"loss": 0.49055272340774536,
"step": 2074
},
{
"epoch": 1.7474747474747474,
"grad_norm": 16.458398818969727,
"learning_rate": 9.310286333143767e-07,
"loss": 1.0591087341308594,
"step": 2076
},
{
"epoch": 1.7491582491582491,
"grad_norm": 3.6667587757110596,
"learning_rate": 9.293880657330604e-07,
"loss": 0.8024224042892456,
"step": 2078
},
{
"epoch": 1.7508417508417509,
"grad_norm": 3.5527923107147217,
"learning_rate": 9.277480859816444e-07,
"loss": 0.9343531131744385,
"step": 2080
},
{
"epoch": 1.7525252525252526,
"grad_norm": 4.238471984863281,
"learning_rate": 9.261086997105487e-07,
"loss": 0.6490952968597412,
"step": 2082
},
{
"epoch": 1.7542087542087543,
"grad_norm": 2.784026861190796,
"learning_rate": 9.244699125681485e-07,
"loss": 1.1208921670913696,
"step": 2084
},
{
"epoch": 1.7558922558922558,
"grad_norm": 3.683945655822754,
"learning_rate": 9.228317302007556e-07,
"loss": 0.788274884223938,
"step": 2086
},
{
"epoch": 1.7575757575757576,
"grad_norm": 8.775335311889648,
"learning_rate": 9.211941582525968e-07,
"loss": 0.4447941184043884,
"step": 2088
},
{
"epoch": 1.7592592592592593,
"grad_norm": 35.036190032958984,
"learning_rate": 9.195572023657969e-07,
"loss": 0.5342724323272705,
"step": 2090
},
{
"epoch": 1.7609427609427608,
"grad_norm": 11.131832122802734,
"learning_rate": 9.179208681803579e-07,
"loss": 0.535330057144165,
"step": 2092
},
{
"epoch": 1.7626262626262625,
"grad_norm": 4.160572052001953,
"learning_rate": 9.162851613341389e-07,
"loss": 0.3984565734863281,
"step": 2094
},
{
"epoch": 1.7643097643097643,
"grad_norm": 3.6985437870025635,
"learning_rate": 9.146500874628391e-07,
"loss": 0.6421704292297363,
"step": 2096
},
{
"epoch": 1.765993265993266,
"grad_norm": 2.077662467956543,
"learning_rate": 9.130156521999757e-07,
"loss": 1.0149686336517334,
"step": 2098
},
{
"epoch": 1.7676767676767677,
"grad_norm": 2.065174102783203,
"learning_rate": 9.113818611768654e-07,
"loss": 0.8843855857849121,
"step": 2100
},
{
"epoch": 1.7693602693602695,
"grad_norm": 2.7010414600372314,
"learning_rate": 9.097487200226059e-07,
"loss": 0.8571631908416748,
"step": 2102
},
{
"epoch": 1.7710437710437712,
"grad_norm": 9.685044288635254,
"learning_rate": 9.081162343640561e-07,
"loss": 0.5381686687469482,
"step": 2104
},
{
"epoch": 1.7727272727272727,
"grad_norm": 3.8229737281799316,
"learning_rate": 9.064844098258153e-07,
"loss": 0.6796019077301025,
"step": 2106
},
{
"epoch": 1.7744107744107744,
"grad_norm": 6.055543899536133,
"learning_rate": 9.048532520302061e-07,
"loss": 0.8706216812133789,
"step": 2108
},
{
"epoch": 1.776094276094276,
"grad_norm": 7.083333969116211,
"learning_rate": 9.032227665972534e-07,
"loss": 0.5699350237846375,
"step": 2110
},
{
"epoch": 1.7777777777777777,
"grad_norm": 2.0101730823516846,
"learning_rate": 9.015929591446651e-07,
"loss": 0.8485995531082153,
"step": 2112
},
{
"epoch": 1.7794612794612794,
"grad_norm": 2.6497552394866943,
"learning_rate": 8.999638352878142e-07,
"loss": 0.8866308927536011,
"step": 2114
},
{
"epoch": 1.7811447811447811,
"grad_norm": 2.9094290733337402,
"learning_rate": 8.983354006397177e-07,
"loss": 0.9138184785842896,
"step": 2116
},
{
"epoch": 1.7828282828282829,
"grad_norm": 2.6958985328674316,
"learning_rate": 8.96707660811018e-07,
"loss": 0.9850746989250183,
"step": 2118
},
{
"epoch": 1.7845117845117846,
"grad_norm": 21.947837829589844,
"learning_rate": 8.950806214099638e-07,
"loss": 0.6375728249549866,
"step": 2120
},
{
"epoch": 1.7861952861952863,
"grad_norm": 4.920895099639893,
"learning_rate": 8.934542880423903e-07,
"loss": 0.5961431860923767,
"step": 2122
},
{
"epoch": 1.7878787878787878,
"grad_norm": 2.1890132427215576,
"learning_rate": 8.918286663117005e-07,
"loss": 0.659866452217102,
"step": 2124
},
{
"epoch": 1.7895622895622896,
"grad_norm": 10.51028823852539,
"learning_rate": 8.902037618188449e-07,
"loss": 0.6706059575080872,
"step": 2126
},
{
"epoch": 1.791245791245791,
"grad_norm": 6.073541164398193,
"learning_rate": 8.885795801623035e-07,
"loss": 0.6864989995956421,
"step": 2128
},
{
"epoch": 1.7929292929292928,
"grad_norm": 19.274333953857422,
"learning_rate": 8.869561269380652e-07,
"loss": 0.674058198928833,
"step": 2130
},
{
"epoch": 1.7946127946127945,
"grad_norm": 3.4625072479248047,
"learning_rate": 8.853334077396098e-07,
"loss": 0.5736150741577148,
"step": 2132
},
{
"epoch": 1.7962962962962963,
"grad_norm": 1.9551900625228882,
"learning_rate": 8.837114281578872e-07,
"loss": 0.6773728728294373,
"step": 2134
},
{
"epoch": 1.797979797979798,
"grad_norm": 6.7064208984375,
"learning_rate": 8.820901937813003e-07,
"loss": 0.347098171710968,
"step": 2136
},
{
"epoch": 1.7996632996632997,
"grad_norm": 1.6629834175109863,
"learning_rate": 8.804697101956828e-07,
"loss": 0.9595216512680054,
"step": 2138
},
{
"epoch": 1.8013468013468015,
"grad_norm": 3.6944870948791504,
"learning_rate": 8.78849982984283e-07,
"loss": 0.7999200820922852,
"step": 2140
},
{
"epoch": 1.803030303030303,
"grad_norm": 3.7662339210510254,
"learning_rate": 8.772310177277427e-07,
"loss": 0.7555183172225952,
"step": 2142
},
{
"epoch": 1.8047138047138047,
"grad_norm": 2.7332985401153564,
"learning_rate": 8.756128200040782e-07,
"loss": 0.7414171099662781,
"step": 2144
},
{
"epoch": 1.8063973063973064,
"grad_norm": 5.167442798614502,
"learning_rate": 8.739953953886614e-07,
"loss": 0.904849112033844,
"step": 2146
},
{
"epoch": 1.808080808080808,
"grad_norm": 7.448000907897949,
"learning_rate": 8.72378749454201e-07,
"loss": 0.8806520104408264,
"step": 2148
},
{
"epoch": 1.8097643097643097,
"grad_norm": 2.8185012340545654,
"learning_rate": 8.707628877707221e-07,
"loss": 0.9877094030380249,
"step": 2150
},
{
"epoch": 1.8114478114478114,
"grad_norm": 3.56538987159729,
"learning_rate": 8.691478159055483e-07,
"loss": 0.9566267728805542,
"step": 2152
},
{
"epoch": 1.8131313131313131,
"grad_norm": 6.516078472137451,
"learning_rate": 8.675335394232819e-07,
"loss": 0.8102941513061523,
"step": 2154
},
{
"epoch": 1.8148148148148149,
"grad_norm": 5.387680530548096,
"learning_rate": 8.659200638857845e-07,
"loss": 0.655036449432373,
"step": 2156
},
{
"epoch": 1.8164983164983166,
"grad_norm": 4.142063140869141,
"learning_rate": 8.643073948521576e-07,
"loss": 0.44311749935150146,
"step": 2158
},
{
"epoch": 1.8181818181818183,
"grad_norm": 1.9489187002182007,
"learning_rate": 8.626955378787256e-07,
"loss": 0.8758860230445862,
"step": 2160
},
{
"epoch": 1.8198653198653199,
"grad_norm": 8.752238273620605,
"learning_rate": 8.610844985190127e-07,
"loss": 0.7219128608703613,
"step": 2162
},
{
"epoch": 1.8215488215488216,
"grad_norm": 8.243671417236328,
"learning_rate": 8.594742823237287e-07,
"loss": 0.8195970058441162,
"step": 2164
},
{
"epoch": 1.823232323232323,
"grad_norm": 8.444494247436523,
"learning_rate": 8.578648948407452e-07,
"loss": 0.9344632625579834,
"step": 2166
},
{
"epoch": 1.8249158249158248,
"grad_norm": 1.946562647819519,
"learning_rate": 8.562563416150794e-07,
"loss": 0.8328951597213745,
"step": 2168
},
{
"epoch": 1.8265993265993266,
"grad_norm": 4.5011749267578125,
"learning_rate": 8.546486281888739e-07,
"loss": 0.5535922050476074,
"step": 2170
},
{
"epoch": 1.8282828282828283,
"grad_norm": 10.435762405395508,
"learning_rate": 8.53041760101378e-07,
"loss": 0.733657956123352,
"step": 2172
},
{
"epoch": 1.82996632996633,
"grad_norm": 6.214064121246338,
"learning_rate": 8.51435742888928e-07,
"loss": 0.40798521041870117,
"step": 2174
},
{
"epoch": 1.8316498316498318,
"grad_norm": 4.490242958068848,
"learning_rate": 8.498305820849296e-07,
"loss": 0.45203477144241333,
"step": 2176
},
{
"epoch": 1.8333333333333335,
"grad_norm": 6.816056251525879,
"learning_rate": 8.482262832198365e-07,
"loss": 0.6513058543205261,
"step": 2178
},
{
"epoch": 1.835016835016835,
"grad_norm": 2.1644816398620605,
"learning_rate": 8.46622851821134e-07,
"loss": 0.7746816277503967,
"step": 2180
},
{
"epoch": 1.8367003367003367,
"grad_norm": 11.113990783691406,
"learning_rate": 8.450202934133174e-07,
"loss": 0.4632836580276489,
"step": 2182
},
{
"epoch": 1.8383838383838382,
"grad_norm": 4.4734086990356445,
"learning_rate": 8.434186135178749e-07,
"loss": 0.899796724319458,
"step": 2184
},
{
"epoch": 1.84006734006734,
"grad_norm": 2.3766531944274902,
"learning_rate": 8.418178176532674e-07,
"loss": 0.90257328748703,
"step": 2186
},
{
"epoch": 1.8417508417508417,
"grad_norm": 13.302746772766113,
"learning_rate": 8.402179113349106e-07,
"loss": 0.8778829574584961,
"step": 2188
},
{
"epoch": 1.8434343434343434,
"grad_norm": 10.324798583984375,
"learning_rate": 8.386189000751544e-07,
"loss": 0.5610869526863098,
"step": 2190
},
{
"epoch": 1.8451178451178452,
"grad_norm": 3.937783718109131,
"learning_rate": 8.370207893832661e-07,
"loss": 0.7988660335540771,
"step": 2192
},
{
"epoch": 1.8468013468013469,
"grad_norm": 7.830168724060059,
"learning_rate": 8.354235847654092e-07,
"loss": 0.6106054782867432,
"step": 2194
},
{
"epoch": 1.8484848484848486,
"grad_norm": 7.153279781341553,
"learning_rate": 8.338272917246252e-07,
"loss": 0.7764344215393066,
"step": 2196
},
{
"epoch": 1.8501683501683501,
"grad_norm": 6.39476203918457,
"learning_rate": 8.322319157608158e-07,
"loss": 0.48035871982574463,
"step": 2198
},
{
"epoch": 1.8518518518518519,
"grad_norm": 7.486396312713623,
"learning_rate": 8.306374623707222e-07,
"loss": 0.9800804853439331,
"step": 2200
},
{
"epoch": 1.8535353535353534,
"grad_norm": 3.6824681758880615,
"learning_rate": 8.29043937047907e-07,
"loss": 0.7192468643188477,
"step": 2202
},
{
"epoch": 1.855218855218855,
"grad_norm": 6.612771987915039,
"learning_rate": 8.274513452827361e-07,
"loss": 0.5936028957366943,
"step": 2204
},
{
"epoch": 1.8569023569023568,
"grad_norm": 3.079265832901001,
"learning_rate": 8.258596925623578e-07,
"loss": 0.9140318632125854,
"step": 2206
},
{
"epoch": 1.8585858585858586,
"grad_norm": 10.242953300476074,
"learning_rate": 8.242689843706852e-07,
"loss": 0.713873028755188,
"step": 2208
},
{
"epoch": 1.8602693602693603,
"grad_norm": 26.58353042602539,
"learning_rate": 8.226792261883777e-07,
"loss": 0.29191094636917114,
"step": 2210
},
{
"epoch": 1.861952861952862,
"grad_norm": 6.435546398162842,
"learning_rate": 8.210904234928213e-07,
"loss": 0.8298804759979248,
"step": 2212
},
{
"epoch": 1.8636363636363638,
"grad_norm": 2.913339853286743,
"learning_rate": 8.195025817581092e-07,
"loss": 1.0796676874160767,
"step": 2214
},
{
"epoch": 1.8653198653198653,
"grad_norm": 42.50606155395508,
"learning_rate": 8.179157064550246e-07,
"loss": 0.3906444311141968,
"step": 2216
},
{
"epoch": 1.867003367003367,
"grad_norm": 13.17294692993164,
"learning_rate": 8.163298030510208e-07,
"loss": 0.5464171171188354,
"step": 2218
},
{
"epoch": 1.8686868686868687,
"grad_norm": 17.247772216796875,
"learning_rate": 8.147448770102019e-07,
"loss": 0.48076120018959045,
"step": 2220
},
{
"epoch": 1.8703703703703702,
"grad_norm": 5.142391681671143,
"learning_rate": 8.131609337933054e-07,
"loss": 0.6968168616294861,
"step": 2222
},
{
"epoch": 1.872053872053872,
"grad_norm": 4.890412330627441,
"learning_rate": 8.115779788576818e-07,
"loss": 0.9484931230545044,
"step": 2224
},
{
"epoch": 1.8737373737373737,
"grad_norm": 4.0591044425964355,
"learning_rate": 8.099960176572768e-07,
"loss": 0.5798113346099854,
"step": 2226
},
{
"epoch": 1.8754208754208754,
"grad_norm": 16.09890365600586,
"learning_rate": 8.08415055642613e-07,
"loss": 0.35563382506370544,
"step": 2228
},
{
"epoch": 1.8771043771043772,
"grad_norm": 6.097412109375,
"learning_rate": 8.068350982607693e-07,
"loss": 1.0293006896972656,
"step": 2230
},
{
"epoch": 1.878787878787879,
"grad_norm": 3.246103525161743,
"learning_rate": 8.052561509553633e-07,
"loss": 0.9102228879928589,
"step": 2232
},
{
"epoch": 1.8804713804713806,
"grad_norm": 6.635921001434326,
"learning_rate": 8.03678219166533e-07,
"loss": 0.515903115272522,
"step": 2234
},
{
"epoch": 1.8821548821548821,
"grad_norm": 5.258808135986328,
"learning_rate": 8.021013083309181e-07,
"loss": 0.7250782251358032,
"step": 2236
},
{
"epoch": 1.8838383838383839,
"grad_norm": 27.69781494140625,
"learning_rate": 8.005254238816392e-07,
"loss": 0.9729253053665161,
"step": 2238
},
{
"epoch": 1.8855218855218854,
"grad_norm": 2.754936933517456,
"learning_rate": 7.989505712482814e-07,
"loss": 1.1490654945373535,
"step": 2240
},
{
"epoch": 1.887205387205387,
"grad_norm": 1.9234169721603394,
"learning_rate": 7.973767558568749e-07,
"loss": 0.9823436737060547,
"step": 2242
},
{
"epoch": 1.8888888888888888,
"grad_norm": 2.8880441188812256,
"learning_rate": 7.95803983129876e-07,
"loss": 0.8976832032203674,
"step": 2244
},
{
"epoch": 1.8905723905723906,
"grad_norm": 4.514529228210449,
"learning_rate": 7.942322584861476e-07,
"loss": 0.9340039491653442,
"step": 2246
},
{
"epoch": 1.8922558922558923,
"grad_norm": 7.478911876678467,
"learning_rate": 7.926615873409435e-07,
"loss": 0.8636904954910278,
"step": 2248
},
{
"epoch": 1.893939393939394,
"grad_norm": 2.7240192890167236,
"learning_rate": 7.910919751058863e-07,
"loss": 0.9821701049804688,
"step": 2250
},
{
"epoch": 1.8956228956228958,
"grad_norm": 2.6539080142974854,
"learning_rate": 7.895234271889502e-07,
"loss": 1.1389049291610718,
"step": 2252
},
{
"epoch": 1.8973063973063973,
"grad_norm": 2.555716037750244,
"learning_rate": 7.879559489944431e-07,
"loss": 0.8757186532020569,
"step": 2254
},
{
"epoch": 1.898989898989899,
"grad_norm": 3.2359490394592285,
"learning_rate": 7.86389545922987e-07,
"loss": 0.7967367172241211,
"step": 2256
},
{
"epoch": 1.9006734006734005,
"grad_norm": 2.5815160274505615,
"learning_rate": 7.848242233714992e-07,
"loss": 0.9813891649246216,
"step": 2258
},
{
"epoch": 1.9023569023569022,
"grad_norm": 5.316218852996826,
"learning_rate": 7.832599867331751e-07,
"loss": 0.6991989612579346,
"step": 2260
},
{
"epoch": 1.904040404040404,
"grad_norm": 3.514714241027832,
"learning_rate": 7.816968413974676e-07,
"loss": 0.7938976883888245,
"step": 2262
},
{
"epoch": 1.9057239057239057,
"grad_norm": 6.5592474937438965,
"learning_rate": 7.801347927500701e-07,
"loss": 0.46941909193992615,
"step": 2264
},
{
"epoch": 1.9074074074074074,
"grad_norm": 11.761022567749023,
"learning_rate": 7.785738461728975e-07,
"loss": 0.7285200953483582,
"step": 2266
},
{
"epoch": 1.9090909090909092,
"grad_norm": 7.991189002990723,
"learning_rate": 7.770140070440679e-07,
"loss": 0.6555970907211304,
"step": 2268
},
{
"epoch": 1.910774410774411,
"grad_norm": 4.922752857208252,
"learning_rate": 7.754552807378827e-07,
"loss": 0.7720062136650085,
"step": 2270
},
{
"epoch": 1.9124579124579124,
"grad_norm": 2.78389573097229,
"learning_rate": 7.738976726248105e-07,
"loss": 0.8745548725128174,
"step": 2272
},
{
"epoch": 1.9141414141414141,
"grad_norm": 10.283120155334473,
"learning_rate": 7.723411880714663e-07,
"loss": 0.7076643705368042,
"step": 2274
},
{
"epoch": 1.9158249158249159,
"grad_norm": 13.527719497680664,
"learning_rate": 7.707858324405945e-07,
"loss": 0.8855887651443481,
"step": 2276
},
{
"epoch": 1.9175084175084174,
"grad_norm": 13.780444145202637,
"learning_rate": 7.692316110910495e-07,
"loss": 0.5699777603149414,
"step": 2278
},
{
"epoch": 1.9191919191919191,
"grad_norm": 7.046093940734863,
"learning_rate": 7.676785293777779e-07,
"loss": 0.20726297795772552,
"step": 2280
},
{
"epoch": 1.9208754208754208,
"grad_norm": 5.450412750244141,
"learning_rate": 7.661265926517997e-07,
"loss": 0.960862398147583,
"step": 2282
},
{
"epoch": 1.9225589225589226,
"grad_norm": 13.540059089660645,
"learning_rate": 7.6457580626019e-07,
"loss": 0.44127357006073,
"step": 2284
},
{
"epoch": 1.9242424242424243,
"grad_norm": 5.831504821777344,
"learning_rate": 7.630261755460598e-07,
"loss": 0.5103174448013306,
"step": 2286
},
{
"epoch": 1.925925925925926,
"grad_norm": 7.158233165740967,
"learning_rate": 7.614777058485398e-07,
"loss": 0.9973621368408203,
"step": 2288
},
{
"epoch": 1.9276094276094278,
"grad_norm": 3.2046473026275635,
"learning_rate": 7.59930402502759e-07,
"loss": 0.6976436972618103,
"step": 2290
},
{
"epoch": 1.9292929292929293,
"grad_norm": 9.439109802246094,
"learning_rate": 7.58384270839829e-07,
"loss": 0.4523466229438782,
"step": 2292
},
{
"epoch": 1.930976430976431,
"grad_norm": 6.197632789611816,
"learning_rate": 7.568393161868234e-07,
"loss": 0.9106472134590149,
"step": 2294
},
{
"epoch": 1.9326599326599325,
"grad_norm": 8.470841407775879,
"learning_rate": 7.552955438667612e-07,
"loss": 0.7909121513366699,
"step": 2296
},
{
"epoch": 1.9343434343434343,
"grad_norm": 3.3162317276000977,
"learning_rate": 7.537529591985879e-07,
"loss": 0.7960456609725952,
"step": 2298
},
{
"epoch": 1.936026936026936,
"grad_norm": 7.409903526306152,
"learning_rate": 7.522115674971564e-07,
"loss": 0.6709874868392944,
"step": 2300
},
{
"epoch": 1.9377104377104377,
"grad_norm": 8.22396183013916,
"learning_rate": 7.506713740732098e-07,
"loss": 1.1500425338745117,
"step": 2302
},
{
"epoch": 1.9393939393939394,
"grad_norm": 3.9755733013153076,
"learning_rate": 7.491323842333626e-07,
"loss": 0.9240370988845825,
"step": 2304
},
{
"epoch": 1.9410774410774412,
"grad_norm": 7.245258331298828,
"learning_rate": 7.47594603280082e-07,
"loss": 0.30636048316955566,
"step": 2306
},
{
"epoch": 1.942760942760943,
"grad_norm": 4.102907180786133,
"learning_rate": 7.460580365116704e-07,
"loss": 0.8063202500343323,
"step": 2308
},
{
"epoch": 1.9444444444444444,
"grad_norm": 2.798117160797119,
"learning_rate": 7.445226892222476e-07,
"loss": 1.042150855064392,
"step": 2310
},
{
"epoch": 1.9461279461279462,
"grad_norm": 11.515227317810059,
"learning_rate": 7.429885667017301e-07,
"loss": 0.9472934603691101,
"step": 2312
},
{
"epoch": 1.9478114478114477,
"grad_norm": 5.401071548461914,
"learning_rate": 7.41455674235816e-07,
"loss": 0.9147957563400269,
"step": 2314
},
{
"epoch": 1.9494949494949494,
"grad_norm": 3.730478048324585,
"learning_rate": 7.399240171059649e-07,
"loss": 0.7157914638519287,
"step": 2316
},
{
"epoch": 1.9511784511784511,
"grad_norm": 4.426076889038086,
"learning_rate": 7.383936005893798e-07,
"loss": 0.8011871576309204,
"step": 2318
},
{
"epoch": 1.9528619528619529,
"grad_norm": 6.439156532287598,
"learning_rate": 7.368644299589894e-07,
"loss": 0.8518431186676025,
"step": 2320
},
{
"epoch": 1.9545454545454546,
"grad_norm": 2.613004446029663,
"learning_rate": 7.353365104834304e-07,
"loss": 0.936795711517334,
"step": 2322
},
{
"epoch": 1.9562289562289563,
"grad_norm": 6.956838130950928,
"learning_rate": 7.338098474270277e-07,
"loss": 0.7357702851295471,
"step": 2324
},
{
"epoch": 1.957912457912458,
"grad_norm": 13.74077320098877,
"learning_rate": 7.322844460497783e-07,
"loss": 0.5305231809616089,
"step": 2326
},
{
"epoch": 1.9595959595959596,
"grad_norm": 2.220991373062134,
"learning_rate": 7.307603116073317e-07,
"loss": 0.9905499219894409,
"step": 2328
},
{
"epoch": 1.9612794612794613,
"grad_norm": 1.9964042901992798,
"learning_rate": 7.292374493509725e-07,
"loss": 1.0259349346160889,
"step": 2330
},
{
"epoch": 1.9629629629629628,
"grad_norm": 3.4638054370880127,
"learning_rate": 7.277158645276014e-07,
"loss": 0.9553219079971313,
"step": 2332
},
{
"epoch": 1.9646464646464645,
"grad_norm": 2.130671977996826,
"learning_rate": 7.261955623797189e-07,
"loss": 0.9786357283592224,
"step": 2334
},
{
"epoch": 1.9663299663299663,
"grad_norm": 2.262347459793091,
"learning_rate": 7.246765481454056e-07,
"loss": 0.8999519348144531,
"step": 2336
},
{
"epoch": 1.968013468013468,
"grad_norm": 8.985565185546875,
"learning_rate": 7.23158827058304e-07,
"loss": 1.0301485061645508,
"step": 2338
},
{
"epoch": 1.9696969696969697,
"grad_norm": 15.289015769958496,
"learning_rate": 7.216424043476022e-07,
"loss": 0.4213113784790039,
"step": 2340
},
{
"epoch": 1.9713804713804715,
"grad_norm": 5.483232498168945,
"learning_rate": 7.20127285238015e-07,
"loss": 0.6755249500274658,
"step": 2342
},
{
"epoch": 1.9730639730639732,
"grad_norm": 5.321086883544922,
"learning_rate": 7.186134749497645e-07,
"loss": 0.5112136602401733,
"step": 2344
},
{
"epoch": 1.9747474747474747,
"grad_norm": 6.330574989318848,
"learning_rate": 7.171009786985642e-07,
"loss": 0.7962218523025513,
"step": 2346
},
{
"epoch": 1.9764309764309764,
"grad_norm": 7.868488788604736,
"learning_rate": 7.155898016956008e-07,
"loss": 0.6971943378448486,
"step": 2348
},
{
"epoch": 1.9781144781144782,
"grad_norm": 51.063167572021484,
"learning_rate": 7.14079949147514e-07,
"loss": 0.6931584477424622,
"step": 2350
},
{
"epoch": 1.9797979797979797,
"grad_norm": 5.527878761291504,
"learning_rate": 7.125714262563814e-07,
"loss": 0.6461153030395508,
"step": 2352
},
{
"epoch": 1.9814814814814814,
"grad_norm": 3.8143720626831055,
"learning_rate": 7.110642382196996e-07,
"loss": 0.4134939908981323,
"step": 2354
},
{
"epoch": 1.9831649831649831,
"grad_norm": 2.772143840789795,
"learning_rate": 7.095583902303648e-07,
"loss": 1.014623999595642,
"step": 2356
},
{
"epoch": 1.9848484848484849,
"grad_norm": 2.1666996479034424,
"learning_rate": 7.080538874766573e-07,
"loss": 0.8629425764083862,
"step": 2358
},
{
"epoch": 1.9865319865319866,
"grad_norm": 1.9438031911849976,
"learning_rate": 7.06550735142222e-07,
"loss": 0.8896007537841797,
"step": 2360
},
{
"epoch": 1.9882154882154883,
"grad_norm": 6.1856369972229,
"learning_rate": 7.050489384060512e-07,
"loss": 0.6207383275032043,
"step": 2362
},
{
"epoch": 1.98989898989899,
"grad_norm": 2.3403923511505127,
"learning_rate": 7.035485024424666e-07,
"loss": 0.912721574306488,
"step": 2364
},
{
"epoch": 1.9915824915824916,
"grad_norm": 11.149336814880371,
"learning_rate": 7.020494324211017e-07,
"loss": 0.8143168687820435,
"step": 2366
},
{
"epoch": 1.9932659932659933,
"grad_norm": 2.9151461124420166,
"learning_rate": 7.005517335068827e-07,
"loss": 0.9495657682418823,
"step": 2368
},
{
"epoch": 1.9949494949494948,
"grad_norm": 1.9637680053710938,
"learning_rate": 6.99055410860013e-07,
"loss": 0.26862990856170654,
"step": 2370
},
{
"epoch": 1.9966329966329965,
"grad_norm": 17.319799423217773,
"learning_rate": 6.975604696359542e-07,
"loss": 0.5134755969047546,
"step": 2372
},
{
"epoch": 1.9983164983164983,
"grad_norm": 5.046746730804443,
"learning_rate": 6.960669149854068e-07,
"loss": 0.8662137985229492,
"step": 2374
},
{
"epoch": 2.0,
"grad_norm": 3.1785898208618164,
"learning_rate": 6.945747520542955e-07,
"loss": 0.8281479477882385,
"step": 2376
},
{
"epoch": 2.0016835016835017,
"grad_norm": 15.919424057006836,
"learning_rate": 6.930839859837496e-07,
"loss": 0.5921661853790283,
"step": 2378
},
{
"epoch": 2.0033670033670035,
"grad_norm": 13.652657508850098,
"learning_rate": 6.915946219100852e-07,
"loss": 1.0555100440979004,
"step": 2380
},
{
"epoch": 2.005050505050505,
"grad_norm": 5.170054912567139,
"learning_rate": 6.901066649647887e-07,
"loss": 0.6134198904037476,
"step": 2382
},
{
"epoch": 2.006734006734007,
"grad_norm": 3.480863094329834,
"learning_rate": 6.886201202744972e-07,
"loss": 0.48556286096572876,
"step": 2384
},
{
"epoch": 2.008417508417508,
"grad_norm": 1.9658989906311035,
"learning_rate": 6.871349929609826e-07,
"loss": 0.6283817291259766,
"step": 2386
},
{
"epoch": 2.01010101010101,
"grad_norm": 3.805121421813965,
"learning_rate": 6.856512881411343e-07,
"loss": 0.7825635671615601,
"step": 2388
},
{
"epoch": 2.0117845117845117,
"grad_norm": 3.4738574028015137,
"learning_rate": 6.841690109269386e-07,
"loss": 0.9271956086158752,
"step": 2390
},
{
"epoch": 2.0134680134680134,
"grad_norm": 6.440873622894287,
"learning_rate": 6.826881664254646e-07,
"loss": 0.6064585447311401,
"step": 2392
},
{
"epoch": 2.015151515151515,
"grad_norm": 5.510295391082764,
"learning_rate": 6.812087597388452e-07,
"loss": 0.610366940498352,
"step": 2394
},
{
"epoch": 2.016835016835017,
"grad_norm": 2.200218439102173,
"learning_rate": 6.79730795964258e-07,
"loss": 0.7530055046081543,
"step": 2396
},
{
"epoch": 2.0185185185185186,
"grad_norm": 9.030868530273438,
"learning_rate": 6.782542801939105e-07,
"loss": 0.7531571388244629,
"step": 2398
},
{
"epoch": 2.0202020202020203,
"grad_norm": 3.04939866065979,
"learning_rate": 6.767792175150211e-07,
"loss": 0.4959731698036194,
"step": 2400
},
{
"epoch": 2.021885521885522,
"grad_norm": 10.346657752990723,
"learning_rate": 6.753056130098009e-07,
"loss": 0.31336265802383423,
"step": 2402
},
{
"epoch": 2.0235690235690234,
"grad_norm": 2.892493486404419,
"learning_rate": 6.738334717554373e-07,
"loss": 0.7610318660736084,
"step": 2404
},
{
"epoch": 2.025252525252525,
"grad_norm": 13.560941696166992,
"learning_rate": 6.723627988240772e-07,
"loss": 0.6177215576171875,
"step": 2406
},
{
"epoch": 2.026936026936027,
"grad_norm": 2.220264196395874,
"learning_rate": 6.708935992828068e-07,
"loss": 0.6627448797225952,
"step": 2408
},
{
"epoch": 2.0286195286195285,
"grad_norm": 4.267128944396973,
"learning_rate": 6.694258781936369e-07,
"loss": 0.664837121963501,
"step": 2410
},
{
"epoch": 2.0303030303030303,
"grad_norm": 3.3940136432647705,
"learning_rate": 6.679596406134844e-07,
"loss": 0.8382737636566162,
"step": 2412
},
{
"epoch": 2.031986531986532,
"grad_norm": 2.147282838821411,
"learning_rate": 6.664948915941546e-07,
"loss": 0.5983447432518005,
"step": 2414
},
{
"epoch": 2.0336700336700337,
"grad_norm": 2.9526758193969727,
"learning_rate": 6.65031636182324e-07,
"loss": 0.8206237554550171,
"step": 2416
},
{
"epoch": 2.0353535353535355,
"grad_norm": 15.74380874633789,
"learning_rate": 6.635698794195237e-07,
"loss": 0.5850080847740173,
"step": 2418
},
{
"epoch": 2.037037037037037,
"grad_norm": 63.14246368408203,
"learning_rate": 6.621096263421202e-07,
"loss": 0.4533715844154358,
"step": 2420
},
{
"epoch": 2.038720538720539,
"grad_norm": 3.8994693756103516,
"learning_rate": 6.606508819813001e-07,
"loss": 0.7626893520355225,
"step": 2422
},
{
"epoch": 2.04040404040404,
"grad_norm": 3.742114543914795,
"learning_rate": 6.591936513630514e-07,
"loss": 0.17822477221488953,
"step": 2424
},
{
"epoch": 2.042087542087542,
"grad_norm": 12.309547424316406,
"learning_rate": 6.577379395081466e-07,
"loss": 0.38434261083602905,
"step": 2426
},
{
"epoch": 2.0437710437710437,
"grad_norm": 3.1989083290100098,
"learning_rate": 6.562837514321258e-07,
"loss": 0.5980604290962219,
"step": 2428
},
{
"epoch": 2.0454545454545454,
"grad_norm": 32.80799865722656,
"learning_rate": 6.548310921452784e-07,
"loss": 0.716747522354126,
"step": 2430
},
{
"epoch": 2.047138047138047,
"grad_norm": 4.070531368255615,
"learning_rate": 6.533799666526275e-07,
"loss": 0.6677117347717285,
"step": 2432
},
{
"epoch": 2.048821548821549,
"grad_norm": 4.801085472106934,
"learning_rate": 6.519303799539104e-07,
"loss": 0.7861591577529907,
"step": 2434
},
{
"epoch": 2.0505050505050506,
"grad_norm": 3.876065731048584,
"learning_rate": 6.504823370435633e-07,
"loss": 1.105973720550537,
"step": 2436
},
{
"epoch": 2.0521885521885523,
"grad_norm": 2.630798578262329,
"learning_rate": 6.490358429107038e-07,
"loss": 0.6676466464996338,
"step": 2438
},
{
"epoch": 2.053872053872054,
"grad_norm": 3.058680534362793,
"learning_rate": 6.47590902539112e-07,
"loss": 0.824833869934082,
"step": 2440
},
{
"epoch": 2.0555555555555554,
"grad_norm": 5.962945461273193,
"learning_rate": 6.461475209072161e-07,
"loss": 0.7032083868980408,
"step": 2442
},
{
"epoch": 2.057239057239057,
"grad_norm": 2.236006021499634,
"learning_rate": 6.44705702988073e-07,
"loss": 0.7378408908843994,
"step": 2444
},
{
"epoch": 2.058922558922559,
"grad_norm": 5.968637943267822,
"learning_rate": 6.432654537493518e-07,
"loss": 0.9346398115158081,
"step": 2446
},
{
"epoch": 2.0606060606060606,
"grad_norm": 2.306854009628296,
"learning_rate": 6.418267781533173e-07,
"loss": 0.7191810607910156,
"step": 2448
},
{
"epoch": 2.0622895622895623,
"grad_norm": 9.214242935180664,
"learning_rate": 6.403896811568124e-07,
"loss": 0.760452389717102,
"step": 2450
},
{
"epoch": 2.063973063973064,
"grad_norm": 5.0180792808532715,
"learning_rate": 6.389541677112407e-07,
"loss": 0.8763862252235413,
"step": 2452
},
{
"epoch": 2.0656565656565657,
"grad_norm": 4.374032974243164,
"learning_rate": 6.375202427625505e-07,
"loss": 0.6157338619232178,
"step": 2454
},
{
"epoch": 2.0673400673400675,
"grad_norm": 5.687982082366943,
"learning_rate": 6.360879112512159e-07,
"loss": 0.7349066138267517,
"step": 2456
},
{
"epoch": 2.069023569023569,
"grad_norm": 2.7759313583374023,
"learning_rate": 6.346571781122218e-07,
"loss": 0.6915404796600342,
"step": 2458
},
{
"epoch": 2.0707070707070705,
"grad_norm": 7.065815448760986,
"learning_rate": 6.332280482750466e-07,
"loss": 0.561396062374115,
"step": 2460
},
{
"epoch": 2.0723905723905722,
"grad_norm": 16.879159927368164,
"learning_rate": 6.318005266636428e-07,
"loss": 0.5830413103103638,
"step": 2462
},
{
"epoch": 2.074074074074074,
"grad_norm": 9.45602798461914,
"learning_rate": 6.303746181964234e-07,
"loss": 0.6078395843505859,
"step": 2464
},
{
"epoch": 2.0757575757575757,
"grad_norm": 6.788721561431885,
"learning_rate": 6.289503277862438e-07,
"loss": 0.7341784238815308,
"step": 2466
},
{
"epoch": 2.0774410774410774,
"grad_norm": 2.810659408569336,
"learning_rate": 6.275276603403824e-07,
"loss": 0.5312877893447876,
"step": 2468
},
{
"epoch": 2.079124579124579,
"grad_norm": 5.600820541381836,
"learning_rate": 6.26106620760528e-07,
"loss": 0.961767315864563,
"step": 2470
},
{
"epoch": 2.080808080808081,
"grad_norm": 2.6611502170562744,
"learning_rate": 6.246872139427602e-07,
"loss": 0.9193134307861328,
"step": 2472
},
{
"epoch": 2.0824915824915826,
"grad_norm": 5.997580528259277,
"learning_rate": 6.232694447775316e-07,
"loss": 0.4731786549091339,
"step": 2474
},
{
"epoch": 2.0841750841750843,
"grad_norm": 6.405127048492432,
"learning_rate": 6.218533181496541e-07,
"loss": 0.57915198802948,
"step": 2476
},
{
"epoch": 2.0858585858585856,
"grad_norm": 2.907135486602783,
"learning_rate": 6.204388389382804e-07,
"loss": 0.8042079210281372,
"step": 2478
},
{
"epoch": 2.0875420875420874,
"grad_norm": 1.9114086627960205,
"learning_rate": 6.190260120168855e-07,
"loss": 0.6323788166046143,
"step": 2480
},
{
"epoch": 2.089225589225589,
"grad_norm": 1.966032862663269,
"learning_rate": 6.17614842253253e-07,
"loss": 0.594678521156311,
"step": 2482
},
{
"epoch": 2.090909090909091,
"grad_norm": 2.6483774185180664,
"learning_rate": 6.162053345094569e-07,
"loss": 0.9705860018730164,
"step": 2484
},
{
"epoch": 2.0925925925925926,
"grad_norm": 5.268326282501221,
"learning_rate": 6.147974936418436e-07,
"loss": 0.6276801228523254,
"step": 2486
},
{
"epoch": 2.0942760942760943,
"grad_norm": 2.3642375469207764,
"learning_rate": 6.133913245010181e-07,
"loss": 0.6014080047607422,
"step": 2488
},
{
"epoch": 2.095959595959596,
"grad_norm": 5.21682071685791,
"learning_rate": 6.119868319318244e-07,
"loss": 0.7621322870254517,
"step": 2490
},
{
"epoch": 2.0976430976430978,
"grad_norm": 3.3880903720855713,
"learning_rate": 6.105840207733302e-07,
"loss": 0.8144615888595581,
"step": 2492
},
{
"epoch": 2.0993265993265995,
"grad_norm": 2.8465569019317627,
"learning_rate": 6.091828958588101e-07,
"loss": 0.499761700630188,
"step": 2494
},
{
"epoch": 2.101010101010101,
"grad_norm": 4.338362216949463,
"learning_rate": 6.077834620157296e-07,
"loss": 0.9024825096130371,
"step": 2496
},
{
"epoch": 2.1026936026936025,
"grad_norm": 2.8251841068267822,
"learning_rate": 6.063857240657264e-07,
"loss": 0.4348450303077698,
"step": 2498
},
{
"epoch": 2.1043771043771042,
"grad_norm": 4.72477388381958,
"learning_rate": 6.049896868245962e-07,
"loss": 0.613303005695343,
"step": 2500
},
{
"epoch": 2.106060606060606,
"grad_norm": 2.4342687129974365,
"learning_rate": 6.035953551022748e-07,
"loss": 0.9862151145935059,
"step": 2502
},
{
"epoch": 2.1077441077441077,
"grad_norm": 11.250151634216309,
"learning_rate": 6.022027337028212e-07,
"loss": 0.7949624061584473,
"step": 2504
},
{
"epoch": 2.1094276094276094,
"grad_norm": 8.595945358276367,
"learning_rate": 6.008118274244025e-07,
"loss": 0.844199538230896,
"step": 2506
},
{
"epoch": 2.111111111111111,
"grad_norm": 5.953275680541992,
"learning_rate": 5.994226410592762e-07,
"loss": 0.47989651560783386,
"step": 2508
},
{
"epoch": 2.112794612794613,
"grad_norm": 5.53914213180542,
"learning_rate": 5.980351793937734e-07,
"loss": 0.5320888757705688,
"step": 2510
},
{
"epoch": 2.1144781144781146,
"grad_norm": 19.394433975219727,
"learning_rate": 5.966494472082832e-07,
"loss": 0.7170990705490112,
"step": 2512
},
{
"epoch": 2.1161616161616164,
"grad_norm": 7.686086654663086,
"learning_rate": 5.952654492772369e-07,
"loss": 0.431751549243927,
"step": 2514
},
{
"epoch": 2.1178451178451176,
"grad_norm": 3.4454784393310547,
"learning_rate": 5.938831903690887e-07,
"loss": 0.840388834476471,
"step": 2516
},
{
"epoch": 2.1195286195286194,
"grad_norm": 4.63939094543457,
"learning_rate": 5.925026752463027e-07,
"loss": 0.17465031147003174,
"step": 2518
},
{
"epoch": 2.121212121212121,
"grad_norm": 23.637449264526367,
"learning_rate": 5.911239086653345e-07,
"loss": 0.3789297044277191,
"step": 2520
},
{
"epoch": 2.122895622895623,
"grad_norm": 4.005544185638428,
"learning_rate": 5.89746895376614e-07,
"loss": 0.20194318890571594,
"step": 2522
},
{
"epoch": 2.1245791245791246,
"grad_norm": 3.130404233932495,
"learning_rate": 5.883716401245329e-07,
"loss": 0.40525293350219727,
"step": 2524
},
{
"epoch": 2.1262626262626263,
"grad_norm": 14.998170852661133,
"learning_rate": 5.869981476474235e-07,
"loss": 0.2688121795654297,
"step": 2526
},
{
"epoch": 2.127946127946128,
"grad_norm": 18.963912963867188,
"learning_rate": 5.856264226775451e-07,
"loss": 0.3136770725250244,
"step": 2528
},
{
"epoch": 2.1296296296296298,
"grad_norm": 3.191150188446045,
"learning_rate": 5.842564699410676e-07,
"loss": 0.5626152753829956,
"step": 2530
},
{
"epoch": 2.1313131313131315,
"grad_norm": 3.6382803916931152,
"learning_rate": 5.828882941580548e-07,
"loss": 0.7779805660247803,
"step": 2532
},
{
"epoch": 2.1329966329966332,
"grad_norm": 3.3205113410949707,
"learning_rate": 5.815219000424475e-07,
"loss": 0.40261930227279663,
"step": 2534
},
{
"epoch": 2.1346801346801345,
"grad_norm": 5.565113544464111,
"learning_rate": 5.801572923020486e-07,
"loss": 0.6595053672790527,
"step": 2536
},
{
"epoch": 2.1363636363636362,
"grad_norm": 9.94298267364502,
"learning_rate": 5.787944756385061e-07,
"loss": 0.32748013734817505,
"step": 2538
},
{
"epoch": 2.138047138047138,
"grad_norm": 0.7888699173927307,
"learning_rate": 5.774334547472963e-07,
"loss": 0.34032267332077026,
"step": 2540
},
{
"epoch": 2.1397306397306397,
"grad_norm": 8.096704483032227,
"learning_rate": 5.760742343177091e-07,
"loss": 0.7002683281898499,
"step": 2542
},
{
"epoch": 2.1414141414141414,
"grad_norm": 3.1933655738830566,
"learning_rate": 5.747168190328313e-07,
"loss": 0.10309363156557083,
"step": 2544
},
{
"epoch": 2.143097643097643,
"grad_norm": 2.4028244018554688,
"learning_rate": 5.73361213569529e-07,
"loss": 0.323750376701355,
"step": 2546
},
{
"epoch": 2.144781144781145,
"grad_norm": 1.90052330493927,
"learning_rate": 5.720074225984335e-07,
"loss": 0.6766308546066284,
"step": 2548
},
{
"epoch": 2.1464646464646466,
"grad_norm": 2.1108572483062744,
"learning_rate": 5.706554507839247e-07,
"loss": 0.8565983772277832,
"step": 2550
},
{
"epoch": 2.148148148148148,
"grad_norm": 2.810182571411133,
"learning_rate": 5.693053027841139e-07,
"loss": 0.4966258108615875,
"step": 2552
},
{
"epoch": 2.1498316498316496,
"grad_norm": 75.41299438476562,
"learning_rate": 5.679569832508294e-07,
"loss": 0.2292374223470688,
"step": 2554
},
{
"epoch": 2.1515151515151514,
"grad_norm": 3.583876132965088,
"learning_rate": 5.666104968295993e-07,
"loss": 0.4831843674182892,
"step": 2556
},
{
"epoch": 2.153198653198653,
"grad_norm": 3.617044448852539,
"learning_rate": 5.652658481596355e-07,
"loss": 0.5890083312988281,
"step": 2558
},
{
"epoch": 2.154882154882155,
"grad_norm": 4.1594061851501465,
"learning_rate": 5.639230418738186e-07,
"loss": 0.416708379983902,
"step": 2560
},
{
"epoch": 2.1565656565656566,
"grad_norm": 4.285228252410889,
"learning_rate": 5.625820825986818e-07,
"loss": 0.477688729763031,
"step": 2562
},
{
"epoch": 2.1582491582491583,
"grad_norm": 3.6317057609558105,
"learning_rate": 5.61242974954393e-07,
"loss": 0.6931259632110596,
"step": 2564
},
{
"epoch": 2.15993265993266,
"grad_norm": 7.4866943359375,
"learning_rate": 5.599057235547422e-07,
"loss": 0.4877997040748596,
"step": 2566
},
{
"epoch": 2.1616161616161618,
"grad_norm": 5.388299465179443,
"learning_rate": 5.585703330071232e-07,
"loss": 0.391178697347641,
"step": 2568
},
{
"epoch": 2.1632996632996635,
"grad_norm": 2.264526605606079,
"learning_rate": 5.572368079125177e-07,
"loss": 0.9337778687477112,
"step": 2570
},
{
"epoch": 2.164983164983165,
"grad_norm": 3.827529191970825,
"learning_rate": 5.559051528654812e-07,
"loss": 1.0406713485717773,
"step": 2572
},
{
"epoch": 2.1666666666666665,
"grad_norm": 6.1171650886535645,
"learning_rate": 5.545753724541259e-07,
"loss": 0.7416504621505737,
"step": 2574
},
{
"epoch": 2.1683501683501682,
"grad_norm": 9.12820053100586,
"learning_rate": 5.532474712601041e-07,
"loss": 0.1839454025030136,
"step": 2576
},
{
"epoch": 2.17003367003367,
"grad_norm": 13.084949493408203,
"learning_rate": 5.519214538585945e-07,
"loss": 0.6754062175750732,
"step": 2578
},
{
"epoch": 2.1717171717171717,
"grad_norm": 8.969803810119629,
"learning_rate": 5.505973248182854e-07,
"loss": 0.22235676646232605,
"step": 2580
},
{
"epoch": 2.1734006734006734,
"grad_norm": 6.776020526885986,
"learning_rate": 5.492750887013576e-07,
"loss": 0.41986188292503357,
"step": 2582
},
{
"epoch": 2.175084175084175,
"grad_norm": 15.121447563171387,
"learning_rate": 5.479547500634716e-07,
"loss": 0.31534767150878906,
"step": 2584
},
{
"epoch": 2.176767676767677,
"grad_norm": 4.160110950469971,
"learning_rate": 5.466363134537495e-07,
"loss": 0.6025125980377197,
"step": 2586
},
{
"epoch": 2.1784511784511786,
"grad_norm": 12.059831619262695,
"learning_rate": 5.453197834147596e-07,
"loss": 0.5609304904937744,
"step": 2588
},
{
"epoch": 2.18013468013468,
"grad_norm": 8.022695541381836,
"learning_rate": 5.440051644825024e-07,
"loss": 0.6940740346908569,
"step": 2590
},
{
"epoch": 2.1818181818181817,
"grad_norm": 11.945213317871094,
"learning_rate": 5.426924611863932e-07,
"loss": 0.523178219795227,
"step": 2592
},
{
"epoch": 2.1835016835016834,
"grad_norm": 12.750484466552734,
"learning_rate": 5.413816780492464e-07,
"loss": 0.3450314402580261,
"step": 2594
},
{
"epoch": 2.185185185185185,
"grad_norm": 5.865060329437256,
"learning_rate": 5.400728195872627e-07,
"loss": 0.6967110633850098,
"step": 2596
},
{
"epoch": 2.186868686868687,
"grad_norm": 2.9188671112060547,
"learning_rate": 5.387658903100093e-07,
"loss": 0.8298006057739258,
"step": 2598
},
{
"epoch": 2.1885521885521886,
"grad_norm": 8.126681327819824,
"learning_rate": 5.374608947204078e-07,
"loss": 0.5891833901405334,
"step": 2600
},
{
"epoch": 2.1902356902356903,
"grad_norm": 1.921739101409912,
"learning_rate": 5.361578373147173e-07,
"loss": 0.7303223609924316,
"step": 2602
},
{
"epoch": 2.191919191919192,
"grad_norm": 10.952816009521484,
"learning_rate": 5.348567225825182e-07,
"loss": 0.785490870475769,
"step": 2604
},
{
"epoch": 2.1936026936026938,
"grad_norm": 9.251832008361816,
"learning_rate": 5.335575550066987e-07,
"loss": 0.46439725160598755,
"step": 2606
},
{
"epoch": 2.1952861952861955,
"grad_norm": 5.436981201171875,
"learning_rate": 5.322603390634379e-07,
"loss": 0.895796000957489,
"step": 2608
},
{
"epoch": 2.196969696969697,
"grad_norm": 3.214667320251465,
"learning_rate": 5.3096507922219e-07,
"loss": 0.6566123962402344,
"step": 2610
},
{
"epoch": 2.1986531986531985,
"grad_norm": 41.99171447753906,
"learning_rate": 5.296717799456703e-07,
"loss": 0.32645493745803833,
"step": 2612
},
{
"epoch": 2.2003367003367003,
"grad_norm": 6.42157506942749,
"learning_rate": 5.283804456898393e-07,
"loss": 0.7071173191070557,
"step": 2614
},
{
"epoch": 2.202020202020202,
"grad_norm": 5.982941627502441,
"learning_rate": 5.270910809038866e-07,
"loss": 0.5429423451423645,
"step": 2616
},
{
"epoch": 2.2037037037037037,
"grad_norm": 23.397838592529297,
"learning_rate": 5.258036900302162e-07,
"loss": 0.4608469009399414,
"step": 2618
},
{
"epoch": 2.2053872053872055,
"grad_norm": 1.3942065238952637,
"learning_rate": 5.245182775044319e-07,
"loss": 0.24561887979507446,
"step": 2620
},
{
"epoch": 2.207070707070707,
"grad_norm": 3.1465113162994385,
"learning_rate": 5.2323484775532e-07,
"loss": 0.5467818975448608,
"step": 2622
},
{
"epoch": 2.208754208754209,
"grad_norm": 12.308442115783691,
"learning_rate": 5.219534052048364e-07,
"loss": 0.48555779457092285,
"step": 2624
},
{
"epoch": 2.2104377104377106,
"grad_norm": 6.089041709899902,
"learning_rate": 5.206739542680903e-07,
"loss": 0.4167608618736267,
"step": 2626
},
{
"epoch": 2.212121212121212,
"grad_norm": 7.500848293304443,
"learning_rate": 5.193964993533275e-07,
"loss": 0.5702179074287415,
"step": 2628
},
{
"epoch": 2.2138047138047137,
"grad_norm": 10.495234489440918,
"learning_rate": 5.181210448619185e-07,
"loss": 0.2557629644870758,
"step": 2630
},
{
"epoch": 2.2154882154882154,
"grad_norm": 2.5270442962646484,
"learning_rate": 5.168475951883405e-07,
"loss": 0.39183729887008667,
"step": 2632
},
{
"epoch": 2.217171717171717,
"grad_norm": 2.1306686401367188,
"learning_rate": 5.155761547201631e-07,
"loss": 0.06966563314199448,
"step": 2634
},
{
"epoch": 2.218855218855219,
"grad_norm": 4.132006645202637,
"learning_rate": 5.143067278380339e-07,
"loss": 0.7425806522369385,
"step": 2636
},
{
"epoch": 2.2205387205387206,
"grad_norm": 2.9199447631835938,
"learning_rate": 5.13039318915663e-07,
"loss": 1.07930326461792,
"step": 2638
},
{
"epoch": 2.2222222222222223,
"grad_norm": 2.4841439723968506,
"learning_rate": 5.117739323198067e-07,
"loss": 0.982938289642334,
"step": 2640
},
{
"epoch": 2.223905723905724,
"grad_norm": 4.3581013679504395,
"learning_rate": 5.105105724102547e-07,
"loss": 0.5647614002227783,
"step": 2642
},
{
"epoch": 2.225589225589226,
"grad_norm": 6.911370754241943,
"learning_rate": 5.092492435398137e-07,
"loss": 0.5829119086265564,
"step": 2644
},
{
"epoch": 2.227272727272727,
"grad_norm": 4.011280059814453,
"learning_rate": 5.079899500542917e-07,
"loss": 0.5897196531295776,
"step": 2646
},
{
"epoch": 2.228956228956229,
"grad_norm": 4.96337890625,
"learning_rate": 5.067326962924848e-07,
"loss": 0.2728573977947235,
"step": 2648
},
{
"epoch": 2.2306397306397305,
"grad_norm": 6.272621154785156,
"learning_rate": 5.054774865861617e-07,
"loss": 0.9227702617645264,
"step": 2650
},
{
"epoch": 2.2323232323232323,
"grad_norm": 4.739163875579834,
"learning_rate": 5.042243252600475e-07,
"loss": 0.5031465888023376,
"step": 2652
},
{
"epoch": 2.234006734006734,
"grad_norm": 13.35574722290039,
"learning_rate": 5.029732166318106e-07,
"loss": 0.49748843908309937,
"step": 2654
},
{
"epoch": 2.2356902356902357,
"grad_norm": 4.151340484619141,
"learning_rate": 5.017241650120462e-07,
"loss": 0.585181713104248,
"step": 2656
},
{
"epoch": 2.2373737373737375,
"grad_norm": 17.889524459838867,
"learning_rate": 5.004771747042631e-07,
"loss": 0.7983870506286621,
"step": 2658
},
{
"epoch": 2.239057239057239,
"grad_norm": 6.143094539642334,
"learning_rate": 4.992322500048673e-07,
"loss": 0.6713172197341919,
"step": 2660
},
{
"epoch": 2.240740740740741,
"grad_norm": 3.4442899227142334,
"learning_rate": 4.979893952031483e-07,
"loss": 0.7296475768089294,
"step": 2662
},
{
"epoch": 2.242424242424242,
"grad_norm": 16.668384552001953,
"learning_rate": 4.96748614581264e-07,
"loss": 0.3102848529815674,
"step": 2664
},
{
"epoch": 2.244107744107744,
"grad_norm": 2.3950233459472656,
"learning_rate": 4.955099124142251e-07,
"loss": 0.712740421295166,
"step": 2666
},
{
"epoch": 2.2457912457912457,
"grad_norm": 4.428253650665283,
"learning_rate": 4.942732929698827e-07,
"loss": 0.5821852684020996,
"step": 2668
},
{
"epoch": 2.2474747474747474,
"grad_norm": 8.776701927185059,
"learning_rate": 4.930387605089104e-07,
"loss": 0.4474225640296936,
"step": 2670
},
{
"epoch": 2.249158249158249,
"grad_norm": 3.6381278038024902,
"learning_rate": 4.918063192847921e-07,
"loss": 0.33651861548423767,
"step": 2672
},
{
"epoch": 2.250841750841751,
"grad_norm": 4.837399482727051,
"learning_rate": 4.905759735438068e-07,
"loss": 0.5961496829986572,
"step": 2674
},
{
"epoch": 2.2525252525252526,
"grad_norm": 2.985142946243286,
"learning_rate": 4.893477275250127e-07,
"loss": 0.6518359184265137,
"step": 2676
},
{
"epoch": 2.2542087542087543,
"grad_norm": 7.1583943367004395,
"learning_rate": 4.881215854602342e-07,
"loss": 0.4896303117275238,
"step": 2678
},
{
"epoch": 2.255892255892256,
"grad_norm": 1.9810396432876587,
"learning_rate": 4.868975515740471e-07,
"loss": 0.8590680956840515,
"step": 2680
},
{
"epoch": 2.257575757575758,
"grad_norm": 7.562203884124756,
"learning_rate": 4.856756300837625e-07,
"loss": 0.18953704833984375,
"step": 2682
},
{
"epoch": 2.259259259259259,
"grad_norm": 7.8364481925964355,
"learning_rate": 4.844558251994146e-07,
"loss": 0.12749773263931274,
"step": 2684
},
{
"epoch": 2.260942760942761,
"grad_norm": 3.5520970821380615,
"learning_rate": 4.832381411237444e-07,
"loss": 0.6111665964126587,
"step": 2686
},
{
"epoch": 2.2626262626262625,
"grad_norm": 4.207799911499023,
"learning_rate": 4.820225820521855e-07,
"loss": 0.36922651529312134,
"step": 2688
},
{
"epoch": 2.2643097643097643,
"grad_norm": 1.94363534450531,
"learning_rate": 4.808091521728506e-07,
"loss": 0.9025669097900391,
"step": 2690
},
{
"epoch": 2.265993265993266,
"grad_norm": 14.200057029724121,
"learning_rate": 4.795978556665165e-07,
"loss": 0.8429475426673889,
"step": 2692
},
{
"epoch": 2.2676767676767677,
"grad_norm": 3.5672523975372314,
"learning_rate": 4.783886967066088e-07,
"loss": 0.6566574573516846,
"step": 2694
},
{
"epoch": 2.2693602693602695,
"grad_norm": 4.338009357452393,
"learning_rate": 4.77181679459189e-07,
"loss": 0.5327779054641724,
"step": 2696
},
{
"epoch": 2.271043771043771,
"grad_norm": 2.5908162593841553,
"learning_rate": 4.759768080829399e-07,
"loss": 0.624381959438324,
"step": 2698
},
{
"epoch": 2.2727272727272725,
"grad_norm": 6.710553169250488,
"learning_rate": 4.747740867291497e-07,
"loss": 0.7681624889373779,
"step": 2700
},
{
"epoch": 2.274410774410774,
"grad_norm": 2.840843915939331,
"learning_rate": 4.7357351954169973e-07,
"loss": 0.49092429876327515,
"step": 2702
},
{
"epoch": 2.276094276094276,
"grad_norm": 2.1035234928131104,
"learning_rate": 4.7237511065704933e-07,
"loss": 0.8667645454406738,
"step": 2704
},
{
"epoch": 2.2777777777777777,
"grad_norm": 3.245436429977417,
"learning_rate": 4.7117886420422094e-07,
"loss": 0.9094717502593994,
"step": 2706
},
{
"epoch": 2.2794612794612794,
"grad_norm": 2.4817285537719727,
"learning_rate": 4.6998478430478714e-07,
"loss": 0.351574569940567,
"step": 2708
},
{
"epoch": 2.281144781144781,
"grad_norm": 5.749747276306152,
"learning_rate": 4.6879287507285596e-07,
"loss": 0.5877597332000732,
"step": 2710
},
{
"epoch": 2.282828282828283,
"grad_norm": 9.687824249267578,
"learning_rate": 4.676031406150555e-07,
"loss": 0.5526677370071411,
"step": 2712
},
{
"epoch": 2.2845117845117846,
"grad_norm": 3.64471435546875,
"learning_rate": 4.66415585030522e-07,
"loss": 0.4332752227783203,
"step": 2714
},
{
"epoch": 2.2861952861952863,
"grad_norm": 7.181333065032959,
"learning_rate": 4.6523021241088416e-07,
"loss": 0.7148293256759644,
"step": 2716
},
{
"epoch": 2.287878787878788,
"grad_norm": 4.991126537322998,
"learning_rate": 4.6404702684024905e-07,
"loss": 0.5515605807304382,
"step": 2718
},
{
"epoch": 2.28956228956229,
"grad_norm": 10.846860885620117,
"learning_rate": 4.628660323951891e-07,
"loss": 0.5390480160713196,
"step": 2720
},
{
"epoch": 2.291245791245791,
"grad_norm": 3.9083449840545654,
"learning_rate": 4.616872331447272e-07,
"loss": 0.63498854637146,
"step": 2722
},
{
"epoch": 2.292929292929293,
"grad_norm": 6.314955234527588,
"learning_rate": 4.605106331503223e-07,
"loss": 0.6880998611450195,
"step": 2724
},
{
"epoch": 2.2946127946127945,
"grad_norm": 3.322652816772461,
"learning_rate": 4.5933623646585683e-07,
"loss": 0.6316101551055908,
"step": 2726
},
{
"epoch": 2.2962962962962963,
"grad_norm": 5.35445499420166,
"learning_rate": 4.581640471376215e-07,
"loss": 0.5416774749755859,
"step": 2728
},
{
"epoch": 2.297979797979798,
"grad_norm": 6.625260353088379,
"learning_rate": 4.5699406920430155e-07,
"loss": 0.972043514251709,
"step": 2730
},
{
"epoch": 2.2996632996632997,
"grad_norm": 3.9685635566711426,
"learning_rate": 4.5582630669696324e-07,
"loss": 0.5268035531044006,
"step": 2732
},
{
"epoch": 2.3013468013468015,
"grad_norm": 9.009088516235352,
"learning_rate": 4.5466076363904e-07,
"loss": 0.4689450263977051,
"step": 2734
},
{
"epoch": 2.303030303030303,
"grad_norm": 6.697409629821777,
"learning_rate": 4.5349744404631785e-07,
"loss": 0.43555888533592224,
"step": 2736
},
{
"epoch": 2.3047138047138045,
"grad_norm": 9.158797264099121,
"learning_rate": 4.5233635192692206e-07,
"loss": 0.5540938377380371,
"step": 2738
},
{
"epoch": 2.3063973063973062,
"grad_norm": 18.85773468017578,
"learning_rate": 4.511774912813043e-07,
"loss": 0.4014560580253601,
"step": 2740
},
{
"epoch": 2.308080808080808,
"grad_norm": 1.82210111618042,
"learning_rate": 4.5002086610222626e-07,
"loss": 0.7727656364440918,
"step": 2742
},
{
"epoch": 2.3097643097643097,
"grad_norm": 3.7924273014068604,
"learning_rate": 4.488664803747487e-07,
"loss": 0.7189053297042847,
"step": 2744
},
{
"epoch": 2.3114478114478114,
"grad_norm": 3.0608716011047363,
"learning_rate": 4.4771433807621644e-07,
"loss": 0.7668474912643433,
"step": 2746
},
{
"epoch": 2.313131313131313,
"grad_norm": 5.792914867401123,
"learning_rate": 4.4656444317624397e-07,
"loss": 0.6078014373779297,
"step": 2748
},
{
"epoch": 2.314814814814815,
"grad_norm": 1.747604250907898,
"learning_rate": 4.454167996367032e-07,
"loss": 0.10793264210224152,
"step": 2750
},
{
"epoch": 2.3164983164983166,
"grad_norm": 4.28343391418457,
"learning_rate": 4.442714114117092e-07,
"loss": 0.33263859152793884,
"step": 2752
},
{
"epoch": 2.3181818181818183,
"grad_norm": 2.2499372959136963,
"learning_rate": 4.4312828244760613e-07,
"loss": 0.39961159229278564,
"step": 2754
},
{
"epoch": 2.31986531986532,
"grad_norm": 3.355552911758423,
"learning_rate": 4.4198741668295425e-07,
"loss": 0.8770014047622681,
"step": 2756
},
{
"epoch": 2.3215488215488214,
"grad_norm": 2.2010586261749268,
"learning_rate": 4.4084881804851644e-07,
"loss": 0.5539072751998901,
"step": 2758
},
{
"epoch": 2.323232323232323,
"grad_norm": 4.903811931610107,
"learning_rate": 4.397124904672437e-07,
"loss": 0.6975724697113037,
"step": 2760
},
{
"epoch": 2.324915824915825,
"grad_norm": 5.035953044891357,
"learning_rate": 4.3857843785426263e-07,
"loss": 0.5050334334373474,
"step": 2762
},
{
"epoch": 2.3265993265993266,
"grad_norm": 3.3227932453155518,
"learning_rate": 4.374466641168622e-07,
"loss": 0.8777497410774231,
"step": 2764
},
{
"epoch": 2.3282828282828283,
"grad_norm": 4.905037879943848,
"learning_rate": 4.363171731544786e-07,
"loss": 0.7257252931594849,
"step": 2766
},
{
"epoch": 2.32996632996633,
"grad_norm": 2.3318030834198,
"learning_rate": 4.351899688586834e-07,
"loss": 0.5315639972686768,
"step": 2768
},
{
"epoch": 2.3316498316498318,
"grad_norm": 12.677505493164062,
"learning_rate": 4.3406505511317025e-07,
"loss": 0.6226543188095093,
"step": 2770
},
{
"epoch": 2.3333333333333335,
"grad_norm": 3.6738951206207275,
"learning_rate": 4.329424357937397e-07,
"loss": 0.5986767411231995,
"step": 2772
},
{
"epoch": 2.3350168350168348,
"grad_norm": 3.570671558380127,
"learning_rate": 4.318221147682879e-07,
"loss": 0.693830132484436,
"step": 2774
},
{
"epoch": 2.3367003367003365,
"grad_norm": 3.0889062881469727,
"learning_rate": 4.307040958967924e-07,
"loss": 0.6411426663398743,
"step": 2776
},
{
"epoch": 2.3383838383838382,
"grad_norm": 4.422166347503662,
"learning_rate": 4.2958838303129817e-07,
"loss": 0.45083481073379517,
"step": 2778
},
{
"epoch": 2.34006734006734,
"grad_norm": 29.303316116333008,
"learning_rate": 4.2847498001590573e-07,
"loss": 0.6881177425384521,
"step": 2780
},
{
"epoch": 2.3417508417508417,
"grad_norm": 2.217395544052124,
"learning_rate": 4.273638906867573e-07,
"loss": 0.5657017230987549,
"step": 2782
},
{
"epoch": 2.3434343434343434,
"grad_norm": 10.195280075073242,
"learning_rate": 4.2625511887202225e-07,
"loss": 0.7839221954345703,
"step": 2784
},
{
"epoch": 2.345117845117845,
"grad_norm": 2.6481029987335205,
"learning_rate": 4.2514866839188657e-07,
"loss": 0.5463940501213074,
"step": 2786
},
{
"epoch": 2.346801346801347,
"grad_norm": 2.2342593669891357,
"learning_rate": 4.2404454305853796e-07,
"loss": 0.8763151168823242,
"step": 2788
},
{
"epoch": 2.3484848484848486,
"grad_norm": 4.609320640563965,
"learning_rate": 4.229427466761522e-07,
"loss": 0.7232416868209839,
"step": 2790
},
{
"epoch": 2.3501683501683504,
"grad_norm": 6.990656852722168,
"learning_rate": 4.2184328304088164e-07,
"loss": 0.5656273365020752,
"step": 2792
},
{
"epoch": 2.351851851851852,
"grad_norm": 10.642841339111328,
"learning_rate": 4.2074615594084146e-07,
"loss": 0.6187400817871094,
"step": 2794
},
{
"epoch": 2.3535353535353534,
"grad_norm": 3.1630921363830566,
"learning_rate": 4.1965136915609543e-07,
"loss": 0.9885926246643066,
"step": 2796
},
{
"epoch": 2.355218855218855,
"grad_norm": 13.756888389587402,
"learning_rate": 4.1855892645864513e-07,
"loss": 0.45941799879074097,
"step": 2798
},
{
"epoch": 2.356902356902357,
"grad_norm": 2.228693962097168,
"learning_rate": 4.1746883161241555e-07,
"loss": 0.9851700067520142,
"step": 2800
},
{
"epoch": 2.3585858585858586,
"grad_norm": 2.863492965698242,
"learning_rate": 4.1638108837324137e-07,
"loss": 0.9169178009033203,
"step": 2802
},
{
"epoch": 2.3602693602693603,
"grad_norm": 3.3131117820739746,
"learning_rate": 4.152957004888563e-07,
"loss": 0.7946122884750366,
"step": 2804
},
{
"epoch": 2.361952861952862,
"grad_norm": 6.783644676208496,
"learning_rate": 4.142126716988784e-07,
"loss": 0.7735965847969055,
"step": 2806
},
{
"epoch": 2.3636363636363638,
"grad_norm": 3.6407532691955566,
"learning_rate": 4.131320057347969e-07,
"loss": 0.802727460861206,
"step": 2808
},
{
"epoch": 2.3653198653198655,
"grad_norm": 3.4392080307006836,
"learning_rate": 4.120537063199612e-07,
"loss": 1.0042896270751953,
"step": 2810
},
{
"epoch": 2.3670033670033668,
"grad_norm": 15.25992202758789,
"learning_rate": 4.109777771695663e-07,
"loss": 0.7024844288825989,
"step": 2812
},
{
"epoch": 2.3686868686868685,
"grad_norm": 2.76926589012146,
"learning_rate": 4.0990422199064103e-07,
"loss": 0.6036837100982666,
"step": 2814
},
{
"epoch": 2.3703703703703702,
"grad_norm": 4.845790386199951,
"learning_rate": 4.0883304448203477e-07,
"loss": 0.484286904335022,
"step": 2816
},
{
"epoch": 2.372053872053872,
"grad_norm": 3.267883777618408,
"learning_rate": 4.077642483344044e-07,
"loss": 0.5557587146759033,
"step": 2818
},
{
"epoch": 2.3737373737373737,
"grad_norm": 5.12905216217041,
"learning_rate": 4.066978372302025e-07,
"loss": 0.6941782236099243,
"step": 2820
},
{
"epoch": 2.3754208754208754,
"grad_norm": 3.630934953689575,
"learning_rate": 4.056338148436643e-07,
"loss": 0.4251060485839844,
"step": 2822
},
{
"epoch": 2.377104377104377,
"grad_norm": 5.501477241516113,
"learning_rate": 4.0457218484079414e-07,
"loss": 0.9760651588439941,
"step": 2824
},
{
"epoch": 2.378787878787879,
"grad_norm": 3.194762945175171,
"learning_rate": 4.035129508793542e-07,
"loss": 0.8394796848297119,
"step": 2826
},
{
"epoch": 2.3804713804713806,
"grad_norm": 689.3011474609375,
"learning_rate": 4.024561166088516e-07,
"loss": 0.4385402798652649,
"step": 2828
},
{
"epoch": 2.3821548821548824,
"grad_norm": 8.300933837890625,
"learning_rate": 4.0140168567052447e-07,
"loss": 0.932929277420044,
"step": 2830
},
{
"epoch": 2.3838383838383836,
"grad_norm": 20.601125717163086,
"learning_rate": 4.003496616973312e-07,
"loss": 0.6770232915878296,
"step": 2832
},
{
"epoch": 2.3855218855218854,
"grad_norm": 7.719077110290527,
"learning_rate": 3.9930004831393757e-07,
"loss": 0.5193581581115723,
"step": 2834
},
{
"epoch": 2.387205387205387,
"grad_norm": 3.433854341506958,
"learning_rate": 3.982528491367025e-07,
"loss": 0.5733506679534912,
"step": 2836
},
{
"epoch": 2.388888888888889,
"grad_norm": 5.136038780212402,
"learning_rate": 3.9720806777366817e-07,
"loss": 0.47218313813209534,
"step": 2838
},
{
"epoch": 2.3905723905723906,
"grad_norm": 1.433040976524353,
"learning_rate": 3.961657078245462e-07,
"loss": 0.8041648864746094,
"step": 2840
},
{
"epoch": 2.3922558922558923,
"grad_norm": 0.9403243660926819,
"learning_rate": 3.9512577288070487e-07,
"loss": 0.3452025055885315,
"step": 2842
},
{
"epoch": 2.393939393939394,
"grad_norm": 2.0302951335906982,
"learning_rate": 3.940882665251576e-07,
"loss": 0.9638313055038452,
"step": 2844
},
{
"epoch": 2.3956228956228958,
"grad_norm": 2.591130495071411,
"learning_rate": 3.930531923325506e-07,
"loss": 0.7442007064819336,
"step": 2846
},
{
"epoch": 2.3973063973063975,
"grad_norm": 4.4280548095703125,
"learning_rate": 3.920205538691497e-07,
"loss": 0.953087329864502,
"step": 2848
},
{
"epoch": 2.398989898989899,
"grad_norm": 2.4256279468536377,
"learning_rate": 3.9099035469282906e-07,
"loss": 0.7336077094078064,
"step": 2850
},
{
"epoch": 2.4006734006734005,
"grad_norm": 8.586638450622559,
"learning_rate": 3.8996259835305835e-07,
"loss": 0.390910804271698,
"step": 2852
},
{
"epoch": 2.4023569023569022,
"grad_norm": 32.83812713623047,
"learning_rate": 3.8893728839089035e-07,
"loss": 0.609326958656311,
"step": 2854
},
{
"epoch": 2.404040404040404,
"grad_norm": 4.8817458152771,
"learning_rate": 3.879144283389495e-07,
"loss": 0.5054650902748108,
"step": 2856
},
{
"epoch": 2.4057239057239057,
"grad_norm": 6.203306198120117,
"learning_rate": 3.8689402172141915e-07,
"loss": 0.6514500975608826,
"step": 2858
},
{
"epoch": 2.4074074074074074,
"grad_norm": 5.882429122924805,
"learning_rate": 3.8587607205402916e-07,
"loss": 0.41622331738471985,
"step": 2860
},
{
"epoch": 2.409090909090909,
"grad_norm": 2.390727996826172,
"learning_rate": 3.848605828440444e-07,
"loss": 0.7136590480804443,
"step": 2862
},
{
"epoch": 2.410774410774411,
"grad_norm": 6.754751682281494,
"learning_rate": 3.8384755759025313e-07,
"loss": 0.4541894793510437,
"step": 2864
},
{
"epoch": 2.4124579124579126,
"grad_norm": 3.0260815620422363,
"learning_rate": 3.828369997829528e-07,
"loss": 0.6994350552558899,
"step": 2866
},
{
"epoch": 2.4141414141414144,
"grad_norm": 2.372957706451416,
"learning_rate": 3.818289129039405e-07,
"loss": 0.8106458187103271,
"step": 2868
},
{
"epoch": 2.4158249158249157,
"grad_norm": 2.801581621170044,
"learning_rate": 3.808233004264997e-07,
"loss": 0.5665256977081299,
"step": 2870
},
{
"epoch": 2.4175084175084174,
"grad_norm": 3.397507905960083,
"learning_rate": 3.79820165815389e-07,
"loss": 0.44936102628707886,
"step": 2872
},
{
"epoch": 2.419191919191919,
"grad_norm": 2.3020706176757812,
"learning_rate": 3.788195125268284e-07,
"loss": 0.8391485214233398,
"step": 2874
},
{
"epoch": 2.420875420875421,
"grad_norm": 3.2758114337921143,
"learning_rate": 3.7782134400848995e-07,
"loss": 0.7489950656890869,
"step": 2876
},
{
"epoch": 2.4225589225589226,
"grad_norm": 5.947027206420898,
"learning_rate": 3.768256636994843e-07,
"loss": 0.4590849280357361,
"step": 2878
},
{
"epoch": 2.4242424242424243,
"grad_norm": 2.203789234161377,
"learning_rate": 3.7583247503034864e-07,
"loss": 0.7745201587677002,
"step": 2880
},
{
"epoch": 2.425925925925926,
"grad_norm": 3.3688504695892334,
"learning_rate": 3.7484178142303625e-07,
"loss": 0.5334046483039856,
"step": 2882
},
{
"epoch": 2.4276094276094278,
"grad_norm": 6.785653114318848,
"learning_rate": 3.738535862909031e-07,
"loss": 0.5028021335601807,
"step": 2884
},
{
"epoch": 2.429292929292929,
"grad_norm": 2.8243677616119385,
"learning_rate": 3.7286789303869735e-07,
"loss": 0.5118685960769653,
"step": 2886
},
{
"epoch": 2.430976430976431,
"grad_norm": 9.112323760986328,
"learning_rate": 3.7188470506254744e-07,
"loss": 0.5720535516738892,
"step": 2888
},
{
"epoch": 2.4326599326599325,
"grad_norm": 2.4455068111419678,
"learning_rate": 3.7090402574994885e-07,
"loss": 0.5391176342964172,
"step": 2890
},
{
"epoch": 2.4343434343434343,
"grad_norm": 5.355926990509033,
"learning_rate": 3.699258584797548e-07,
"loss": 0.6294881105422974,
"step": 2892
},
{
"epoch": 2.436026936026936,
"grad_norm": 2.457951545715332,
"learning_rate": 3.6895020662216326e-07,
"loss": 0.9022385478019714,
"step": 2894
},
{
"epoch": 2.4377104377104377,
"grad_norm": 7.03529167175293,
"learning_rate": 3.679770735387052e-07,
"loss": 0.720146656036377,
"step": 2896
},
{
"epoch": 2.4393939393939394,
"grad_norm": 10.114142417907715,
"learning_rate": 3.6700646258223343e-07,
"loss": 0.6195645332336426,
"step": 2898
},
{
"epoch": 2.441077441077441,
"grad_norm": 5.667145729064941,
"learning_rate": 3.6603837709691153e-07,
"loss": 0.43182432651519775,
"step": 2900
},
{
"epoch": 2.442760942760943,
"grad_norm": 13.144913673400879,
"learning_rate": 3.6507282041820085e-07,
"loss": 0.7789742350578308,
"step": 2902
},
{
"epoch": 2.4444444444444446,
"grad_norm": 9.248213768005371,
"learning_rate": 3.641097958728506e-07,
"loss": 0.48242291808128357,
"step": 2904
},
{
"epoch": 2.4461279461279464,
"grad_norm": 2.1247684955596924,
"learning_rate": 3.631493067788858e-07,
"loss": 0.3829724192619324,
"step": 2906
},
{
"epoch": 2.4478114478114477,
"grad_norm": 5.711479663848877,
"learning_rate": 3.6219135644559506e-07,
"loss": 0.5261117815971375,
"step": 2908
},
{
"epoch": 2.4494949494949494,
"grad_norm": 9.852108001708984,
"learning_rate": 3.6123594817352046e-07,
"loss": 0.6702965497970581,
"step": 2910
},
{
"epoch": 2.451178451178451,
"grad_norm": 6.790271282196045,
"learning_rate": 3.602830852544458e-07,
"loss": 0.4730827212333679,
"step": 2912
},
{
"epoch": 2.452861952861953,
"grad_norm": 8.912752151489258,
"learning_rate": 3.593327709713844e-07,
"loss": 0.7823283076286316,
"step": 2914
},
{
"epoch": 2.4545454545454546,
"grad_norm": 4.171782970428467,
"learning_rate": 3.5838500859856893e-07,
"loss": 0.6686667203903198,
"step": 2916
},
{
"epoch": 2.4562289562289563,
"grad_norm": 2.5204222202301025,
"learning_rate": 3.5743980140143975e-07,
"loss": 0.3113139867782593,
"step": 2918
},
{
"epoch": 2.457912457912458,
"grad_norm": 3.9417402744293213,
"learning_rate": 3.5649715263663297e-07,
"loss": 0.7965060472488403,
"step": 2920
},
{
"epoch": 2.45959595959596,
"grad_norm": 98.92294311523438,
"learning_rate": 3.5555706555197043e-07,
"loss": 0.43743637204170227,
"step": 2922
},
{
"epoch": 2.461279461279461,
"grad_norm": 3.686532974243164,
"learning_rate": 3.5461954338644795e-07,
"loss": 0.30664563179016113,
"step": 2924
},
{
"epoch": 2.462962962962963,
"grad_norm": 2.410140037536621,
"learning_rate": 3.536845893702234e-07,
"loss": 0.5530849695205688,
"step": 2926
},
{
"epoch": 2.4646464646464645,
"grad_norm": 24.317949295043945,
"learning_rate": 3.527522067246068e-07,
"loss": 0.5903668403625488,
"step": 2928
},
{
"epoch": 2.4663299663299663,
"grad_norm": 3.0360710620880127,
"learning_rate": 3.518223986620491e-07,
"loss": 0.24971121549606323,
"step": 2930
},
{
"epoch": 2.468013468013468,
"grad_norm": 5.305819511413574,
"learning_rate": 3.5089516838612986e-07,
"loss": 0.654639482498169,
"step": 2932
},
{
"epoch": 2.4696969696969697,
"grad_norm": 6.428488254547119,
"learning_rate": 3.499705190915476e-07,
"loss": 0.6544331312179565,
"step": 2934
},
{
"epoch": 2.4713804713804715,
"grad_norm": 5.150181293487549,
"learning_rate": 3.4904845396410854e-07,
"loss": 0.4527553915977478,
"step": 2936
},
{
"epoch": 2.473063973063973,
"grad_norm": 9.783395767211914,
"learning_rate": 3.4812897618071445e-07,
"loss": 0.5435815453529358,
"step": 2938
},
{
"epoch": 2.474747474747475,
"grad_norm": 5.587001800537109,
"learning_rate": 3.472120889093536e-07,
"loss": 0.4773102402687073,
"step": 2940
},
{
"epoch": 2.4764309764309766,
"grad_norm": 4.579451084136963,
"learning_rate": 3.462977953090884e-07,
"loss": 0.40418028831481934,
"step": 2942
},
{
"epoch": 2.478114478114478,
"grad_norm": 8.405234336853027,
"learning_rate": 3.453860985300446e-07,
"loss": 0.43912988901138306,
"step": 2944
},
{
"epoch": 2.4797979797979797,
"grad_norm": 2.54058837890625,
"learning_rate": 3.4447700171340164e-07,
"loss": 0.9208707213401794,
"step": 2946
},
{
"epoch": 2.4814814814814814,
"grad_norm": 2.506683588027954,
"learning_rate": 3.4357050799138053e-07,
"loss": 0.9445154666900635,
"step": 2948
},
{
"epoch": 2.483164983164983,
"grad_norm": 2.4092612266540527,
"learning_rate": 3.4266662048723337e-07,
"loss": 0.9850308895111084,
"step": 2950
},
{
"epoch": 2.484848484848485,
"grad_norm": 10.964947700500488,
"learning_rate": 3.417653423152329e-07,
"loss": 0.8890873193740845,
"step": 2952
},
{
"epoch": 2.4865319865319866,
"grad_norm": 3.6544744968414307,
"learning_rate": 3.4086667658066186e-07,
"loss": 0.5936705470085144,
"step": 2954
},
{
"epoch": 2.4882154882154883,
"grad_norm": 6.769886016845703,
"learning_rate": 3.3997062637980167e-07,
"loss": 0.8404591083526611,
"step": 2956
},
{
"epoch": 2.48989898989899,
"grad_norm": 6.549720764160156,
"learning_rate": 3.390771947999224e-07,
"loss": 0.5225011110305786,
"step": 2958
},
{
"epoch": 2.4915824915824913,
"grad_norm": 3.255201816558838,
"learning_rate": 3.381863849192718e-07,
"loss": 0.8342874050140381,
"step": 2960
},
{
"epoch": 2.493265993265993,
"grad_norm": 4.254117488861084,
"learning_rate": 3.3729819980706444e-07,
"loss": 0.5838370323181152,
"step": 2962
},
{
"epoch": 2.494949494949495,
"grad_norm": 2.933912992477417,
"learning_rate": 3.364126425234719e-07,
"loss": 0.7112206220626831,
"step": 2964
},
{
"epoch": 2.4966329966329965,
"grad_norm": 5.019345760345459,
"learning_rate": 3.3552971611961187e-07,
"loss": 0.5937138199806213,
"step": 2966
},
{
"epoch": 2.4983164983164983,
"grad_norm": 3.7426111698150635,
"learning_rate": 3.34649423637537e-07,
"loss": 0.81259685754776,
"step": 2968
},
{
"epoch": 2.5,
"grad_norm": 14.945383071899414,
"learning_rate": 3.337717681102253e-07,
"loss": 0.8419524431228638,
"step": 2970
},
{
"epoch": 2.5016835016835017,
"grad_norm": 3.5432753562927246,
"learning_rate": 3.328967525615697e-07,
"loss": 0.36146029829978943,
"step": 2972
},
{
"epoch": 2.5033670033670035,
"grad_norm": 31.251523971557617,
"learning_rate": 3.3202438000636634e-07,
"loss": 0.5271892547607422,
"step": 2974
},
{
"epoch": 2.505050505050505,
"grad_norm": 4.31404447555542,
"learning_rate": 3.311546534503061e-07,
"loss": 0.6813575029373169,
"step": 2976
},
{
"epoch": 2.506734006734007,
"grad_norm": 10.586312294006348,
"learning_rate": 3.3028757588996303e-07,
"loss": 0.3660055994987488,
"step": 2978
},
{
"epoch": 2.5084175084175087,
"grad_norm": 3.4156813621520996,
"learning_rate": 3.294231503127839e-07,
"loss": 0.7575110197067261,
"step": 2980
},
{
"epoch": 2.51010101010101,
"grad_norm": 8.647886276245117,
"learning_rate": 3.2856137969707847e-07,
"loss": 0.788750171661377,
"step": 2982
},
{
"epoch": 2.5117845117845117,
"grad_norm": 3.4446113109588623,
"learning_rate": 3.277022670120095e-07,
"loss": 0.4518158435821533,
"step": 2984
},
{
"epoch": 2.5134680134680134,
"grad_norm": 15.611486434936523,
"learning_rate": 3.268458152175813e-07,
"loss": 0.7932558059692383,
"step": 2986
},
{
"epoch": 2.515151515151515,
"grad_norm": 27.114980697631836,
"learning_rate": 3.2599202726463084e-07,
"loss": 0.61873459815979,
"step": 2988
},
{
"epoch": 2.516835016835017,
"grad_norm": 2.904008626937866,
"learning_rate": 3.2514090609481683e-07,
"loss": 0.10597741603851318,
"step": 2990
},
{
"epoch": 2.5185185185185186,
"grad_norm": 4.048925399780273,
"learning_rate": 3.2429245464060965e-07,
"loss": 0.8708055019378662,
"step": 2992
},
{
"epoch": 2.5202020202020203,
"grad_norm": 8.804458618164062,
"learning_rate": 3.234466758252818e-07,
"loss": 0.5630843043327332,
"step": 2994
},
{
"epoch": 2.5218855218855216,
"grad_norm": 2.408494234085083,
"learning_rate": 3.2260357256289715e-07,
"loss": 0.6830452084541321,
"step": 2996
},
{
"epoch": 2.5235690235690234,
"grad_norm": 4.321279525756836,
"learning_rate": 3.217631477583009e-07,
"loss": 0.5143815875053406,
"step": 2998
},
{
"epoch": 2.525252525252525,
"grad_norm": 1.794520378112793,
"learning_rate": 3.2092540430711044e-07,
"loss": 0.5180540084838867,
"step": 3000
},
{
"epoch": 2.526936026936027,
"grad_norm": 3.5048828125,
"learning_rate": 3.200903450957044e-07,
"loss": 0.49375149607658386,
"step": 3002
},
{
"epoch": 2.5286195286195285,
"grad_norm": 3.251695156097412,
"learning_rate": 3.192579730012129e-07,
"loss": 0.9845426082611084,
"step": 3004
},
{
"epoch": 2.5303030303030303,
"grad_norm": 6.4302263259887695,
"learning_rate": 3.184282908915081e-07,
"loss": 0.7751657962799072,
"step": 3006
},
{
"epoch": 2.531986531986532,
"grad_norm": 2.9614450931549072,
"learning_rate": 3.1760130162519427e-07,
"loss": 0.6437252759933472,
"step": 3008
},
{
"epoch": 2.5336700336700337,
"grad_norm": 3.641021728515625,
"learning_rate": 3.16777008051597e-07,
"loss": 0.33099907636642456,
"step": 3010
},
{
"epoch": 2.5353535353535355,
"grad_norm": 6.20613431930542,
"learning_rate": 3.159554130107546e-07,
"loss": 0.7693390846252441,
"step": 3012
},
{
"epoch": 2.537037037037037,
"grad_norm": 2.9264049530029297,
"learning_rate": 3.1513651933340797e-07,
"loss": 0.6058576107025146,
"step": 3014
},
{
"epoch": 2.538720538720539,
"grad_norm": 4.105390548706055,
"learning_rate": 3.143203298409899e-07,
"loss": 0.5138027667999268,
"step": 3016
},
{
"epoch": 2.5404040404040407,
"grad_norm": 13.755269050598145,
"learning_rate": 3.1350684734561676e-07,
"loss": 0.8655276298522949,
"step": 3018
},
{
"epoch": 2.542087542087542,
"grad_norm": 2.1755192279815674,
"learning_rate": 3.126960746500784e-07,
"loss": 0.7289071083068848,
"step": 3020
},
{
"epoch": 2.5437710437710437,
"grad_norm": 12.643874168395996,
"learning_rate": 3.118880145478274e-07,
"loss": 0.8041051030158997,
"step": 3022
},
{
"epoch": 2.5454545454545454,
"grad_norm": 3.0522072315216064,
"learning_rate": 3.110826698229711e-07,
"loss": 0.978661835193634,
"step": 3024
},
{
"epoch": 2.547138047138047,
"grad_norm": 10.360844612121582,
"learning_rate": 3.102800432502607e-07,
"loss": 0.2467118501663208,
"step": 3026
},
{
"epoch": 2.548821548821549,
"grad_norm": 4.895616054534912,
"learning_rate": 3.0948013759508274e-07,
"loss": 0.522205114364624,
"step": 3028
},
{
"epoch": 2.5505050505050506,
"grad_norm": 8.892946243286133,
"learning_rate": 3.0868295561344874e-07,
"loss": 0.4860239624977112,
"step": 3030
},
{
"epoch": 2.5521885521885523,
"grad_norm": 2.0342283248901367,
"learning_rate": 3.078885000519858e-07,
"loss": 0.4318680763244629,
"step": 3032
},
{
"epoch": 2.5538720538720536,
"grad_norm": 3.473409414291382,
"learning_rate": 3.0709677364792767e-07,
"loss": 0.8540394306182861,
"step": 3034
},
{
"epoch": 2.5555555555555554,
"grad_norm": 10.30406665802002,
"learning_rate": 3.0630777912910533e-07,
"loss": 0.9184716939926147,
"step": 3036
},
{
"epoch": 2.557239057239057,
"grad_norm": 6.738753795623779,
"learning_rate": 3.0552151921393633e-07,
"loss": 0.6098148822784424,
"step": 3038
},
{
"epoch": 2.558922558922559,
"grad_norm": 2.9204185009002686,
"learning_rate": 3.0473799661141707e-07,
"loss": 0.9494307041168213,
"step": 3040
},
{
"epoch": 2.5606060606060606,
"grad_norm": 5.460939407348633,
"learning_rate": 3.0395721402111286e-07,
"loss": 0.6524157524108887,
"step": 3042
},
{
"epoch": 2.5622895622895623,
"grad_norm": 4.9505109786987305,
"learning_rate": 3.031791741331478e-07,
"loss": 0.8453473448753357,
"step": 3044
},
{
"epoch": 2.563973063973064,
"grad_norm": 12.800024032592773,
"learning_rate": 3.0240387962819695e-07,
"loss": 0.6964143514633179,
"step": 3046
},
{
"epoch": 2.5656565656565657,
"grad_norm": 2.980398654937744,
"learning_rate": 3.016313331774762e-07,
"loss": 0.8597656488418579,
"step": 3048
},
{
"epoch": 2.5673400673400675,
"grad_norm": 5.009873867034912,
"learning_rate": 3.008615374427329e-07,
"loss": 0.3663683533668518,
"step": 3050
},
{
"epoch": 2.569023569023569,
"grad_norm": 3.2331385612487793,
"learning_rate": 3.000944950762373e-07,
"loss": 0.9516968131065369,
"step": 3052
},
{
"epoch": 2.570707070707071,
"grad_norm": 3.4293010234832764,
"learning_rate": 2.993302087207732e-07,
"loss": 0.07853099703788757,
"step": 3054
},
{
"epoch": 2.5723905723905722,
"grad_norm": 7.385575771331787,
"learning_rate": 2.985686810096285e-07,
"loss": 0.5600473284721375,
"step": 3056
},
{
"epoch": 2.574074074074074,
"grad_norm": 6.306962490081787,
"learning_rate": 2.978099145665867e-07,
"loss": 0.3351885974407196,
"step": 3058
},
{
"epoch": 2.5757575757575757,
"grad_norm": 4.720430850982666,
"learning_rate": 2.970539120059174e-07,
"loss": 0.6371778249740601,
"step": 3060
},
{
"epoch": 2.5774410774410774,
"grad_norm": 6.102284908294678,
"learning_rate": 2.963006759323676e-07,
"loss": 0.5941987037658691,
"step": 3062
},
{
"epoch": 2.579124579124579,
"grad_norm": 5.050604820251465,
"learning_rate": 2.955502089411523e-07,
"loss": 0.424297571182251,
"step": 3064
},
{
"epoch": 2.580808080808081,
"grad_norm": 5.28799295425415,
"learning_rate": 2.9480251361794656e-07,
"loss": 0.5996015667915344,
"step": 3066
},
{
"epoch": 2.5824915824915826,
"grad_norm": 9.331116676330566,
"learning_rate": 2.940575925388746e-07,
"loss": 0.3746086657047272,
"step": 3068
},
{
"epoch": 2.584175084175084,
"grad_norm": 13.008201599121094,
"learning_rate": 2.933154482705035e-07,
"loss": 0.17353637516498566,
"step": 3070
},
{
"epoch": 2.5858585858585856,
"grad_norm": 5.598928928375244,
"learning_rate": 2.925760833698327e-07,
"loss": 0.43435174226760864,
"step": 3072
},
{
"epoch": 2.5875420875420874,
"grad_norm": 4.106137752532959,
"learning_rate": 2.9183950038428475e-07,
"loss": 0.8951042890548706,
"step": 3074
},
{
"epoch": 2.589225589225589,
"grad_norm": 7.533908843994141,
"learning_rate": 2.9110570185169834e-07,
"loss": 0.35531511902809143,
"step": 3076
},
{
"epoch": 2.590909090909091,
"grad_norm": 2.466156482696533,
"learning_rate": 2.903746903003184e-07,
"loss": 0.8299113512039185,
"step": 3078
},
{
"epoch": 2.5925925925925926,
"grad_norm": 4.047122478485107,
"learning_rate": 2.896464682487866e-07,
"loss": 0.6478674411773682,
"step": 3080
},
{
"epoch": 2.5942760942760943,
"grad_norm": 2.4090776443481445,
"learning_rate": 2.8892103820613487e-07,
"loss": 0.9649114012718201,
"step": 3082
},
{
"epoch": 2.595959595959596,
"grad_norm": 3.08392071723938,
"learning_rate": 2.88198402671775e-07,
"loss": 0.5619069337844849,
"step": 3084
},
{
"epoch": 2.5976430976430978,
"grad_norm": 3.889181137084961,
"learning_rate": 2.874785641354901e-07,
"loss": 0.5941061973571777,
"step": 3086
},
{
"epoch": 2.5993265993265995,
"grad_norm": 4.151243209838867,
"learning_rate": 2.867615250774269e-07,
"loss": 0.7975903153419495,
"step": 3088
},
{
"epoch": 2.601010101010101,
"grad_norm": 6.307215690612793,
"learning_rate": 2.860472879680869e-07,
"loss": 0.8723431825637817,
"step": 3090
},
{
"epoch": 2.602693602693603,
"grad_norm": 4.979188442230225,
"learning_rate": 2.8533585526831726e-07,
"loss": 0.6906735897064209,
"step": 3092
},
{
"epoch": 2.6043771043771042,
"grad_norm": 5.310150623321533,
"learning_rate": 2.8462722942930286e-07,
"loss": 0.5048916339874268,
"step": 3094
},
{
"epoch": 2.606060606060606,
"grad_norm": 5.775015830993652,
"learning_rate": 2.8392141289255806e-07,
"loss": 0.660202145576477,
"step": 3096
},
{
"epoch": 2.6077441077441077,
"grad_norm": 12.841134071350098,
"learning_rate": 2.8321840808991775e-07,
"loss": 0.5634772777557373,
"step": 3098
},
{
"epoch": 2.6094276094276094,
"grad_norm": 6.739739418029785,
"learning_rate": 2.8251821744352933e-07,
"loss": 0.5956814289093018,
"step": 3100
},
{
"epoch": 2.611111111111111,
"grad_norm": 2.563978433609009,
"learning_rate": 2.8182084336584423e-07,
"loss": 0.5830974578857422,
"step": 3102
},
{
"epoch": 2.612794612794613,
"grad_norm": 4.95272970199585,
"learning_rate": 2.8112628825960926e-07,
"loss": 0.8090439438819885,
"step": 3104
},
{
"epoch": 2.6144781144781146,
"grad_norm": 3.6197354793548584,
"learning_rate": 2.804345545178594e-07,
"loss": 0.7719713449478149,
"step": 3106
},
{
"epoch": 2.616161616161616,
"grad_norm": 12.875308990478516,
"learning_rate": 2.7974564452390833e-07,
"loss": 0.18324008584022522,
"step": 3108
},
{
"epoch": 2.6178451178451176,
"grad_norm": 3.717010498046875,
"learning_rate": 2.790595606513406e-07,
"loss": 0.7723451852798462,
"step": 3110
},
{
"epoch": 2.6195286195286194,
"grad_norm": 2.814573287963867,
"learning_rate": 2.78376305264004e-07,
"loss": 0.39754652976989746,
"step": 3112
},
{
"epoch": 2.621212121212121,
"grad_norm": 3.2848994731903076,
"learning_rate": 2.776958807160011e-07,
"loss": 0.4727073609828949,
"step": 3114
},
{
"epoch": 2.622895622895623,
"grad_norm": 3.7905068397521973,
"learning_rate": 2.7701828935168026e-07,
"loss": 0.8447589874267578,
"step": 3116
},
{
"epoch": 2.6245791245791246,
"grad_norm": 2.8799266815185547,
"learning_rate": 2.763435335056291e-07,
"loss": 1.0325953960418701,
"step": 3118
},
{
"epoch": 2.6262626262626263,
"grad_norm": 3.1782491207122803,
"learning_rate": 2.756716155026656e-07,
"loss": 0.5554063320159912,
"step": 3120
},
{
"epoch": 2.627946127946128,
"grad_norm": 2.897000551223755,
"learning_rate": 2.750025376578295e-07,
"loss": 0.9207072854042053,
"step": 3122
},
{
"epoch": 2.6296296296296298,
"grad_norm": 2.4364206790924072,
"learning_rate": 2.743363022763758e-07,
"loss": 0.8367090225219727,
"step": 3124
},
{
"epoch": 2.6313131313131315,
"grad_norm": 4.580779075622559,
"learning_rate": 2.7367291165376593e-07,
"loss": 0.6048181056976318,
"step": 3126
},
{
"epoch": 2.6329966329966332,
"grad_norm": 6.332035064697266,
"learning_rate": 2.7301236807565925e-07,
"loss": 0.808570384979248,
"step": 3128
},
{
"epoch": 2.634680134680135,
"grad_norm": 7.100130081176758,
"learning_rate": 2.7235467381790654e-07,
"loss": 0.49354591965675354,
"step": 3130
},
{
"epoch": 2.6363636363636362,
"grad_norm": 2.4457104206085205,
"learning_rate": 2.716998311465415e-07,
"loss": 0.2983268201351166,
"step": 3132
},
{
"epoch": 2.638047138047138,
"grad_norm": 4.332514762878418,
"learning_rate": 2.710478423177722e-07,
"loss": 0.8370668888092041,
"step": 3134
},
{
"epoch": 2.6397306397306397,
"grad_norm": 4.5044684410095215,
"learning_rate": 2.7039870957797464e-07,
"loss": 0.7652538418769836,
"step": 3136
},
{
"epoch": 2.6414141414141414,
"grad_norm": 4.017055511474609,
"learning_rate": 2.697524351636844e-07,
"loss": 0.4114927649497986,
"step": 3138
},
{
"epoch": 2.643097643097643,
"grad_norm": 3.3894689083099365,
"learning_rate": 2.691090213015886e-07,
"loss": 0.8686310052871704,
"step": 3140
},
{
"epoch": 2.644781144781145,
"grad_norm": 2.7027831077575684,
"learning_rate": 2.6846847020851884e-07,
"loss": 0.5540004372596741,
"step": 3142
},
{
"epoch": 2.6464646464646466,
"grad_norm": 3.608794927597046,
"learning_rate": 2.678307840914431e-07,
"loss": 0.8333272933959961,
"step": 3144
},
{
"epoch": 2.648148148148148,
"grad_norm": 3.318763494491577,
"learning_rate": 2.6719596514745826e-07,
"loss": 0.9629621505737305,
"step": 3146
},
{
"epoch": 2.6498316498316496,
"grad_norm": 3.6985297203063965,
"learning_rate": 2.665640155637828e-07,
"loss": 0.5129526853561401,
"step": 3148
},
{
"epoch": 2.6515151515151514,
"grad_norm": 2.535443067550659,
"learning_rate": 2.659349375177489e-07,
"loss": 0.8636926412582397,
"step": 3150
},
{
"epoch": 2.653198653198653,
"grad_norm": 2.768599510192871,
"learning_rate": 2.6530873317679515e-07,
"loss": 0.20498168468475342,
"step": 3152
},
{
"epoch": 2.654882154882155,
"grad_norm": 34.86625671386719,
"learning_rate": 2.6468540469845895e-07,
"loss": 0.9441362619400024,
"step": 3154
},
{
"epoch": 2.6565656565656566,
"grad_norm": 9.064558982849121,
"learning_rate": 2.640649542303693e-07,
"loss": 0.5518494844436646,
"step": 3156
},
{
"epoch": 2.6582491582491583,
"grad_norm": 3.18203067779541,
"learning_rate": 2.634473839102389e-07,
"loss": 0.35931962728500366,
"step": 3158
},
{
"epoch": 2.65993265993266,
"grad_norm": 7.7922282218933105,
"learning_rate": 2.6283269586585737e-07,
"loss": 0.44168537855148315,
"step": 3160
},
{
"epoch": 2.6616161616161618,
"grad_norm": 4.682225227355957,
"learning_rate": 2.6222089221508404e-07,
"loss": 0.6104831695556641,
"step": 3162
},
{
"epoch": 2.6632996632996635,
"grad_norm": 2.9735536575317383,
"learning_rate": 2.6161197506583944e-07,
"loss": 0.8378016352653503,
"step": 3164
},
{
"epoch": 2.6649831649831652,
"grad_norm": 6.616426467895508,
"learning_rate": 2.610059465160995e-07,
"loss": 0.6439419984817505,
"step": 3166
},
{
"epoch": 2.6666666666666665,
"grad_norm": 3.3657751083374023,
"learning_rate": 2.6040280865388773e-07,
"loss": 0.7727220058441162,
"step": 3168
},
{
"epoch": 2.6683501683501682,
"grad_norm": 3.285837173461914,
"learning_rate": 2.5980256355726744e-07,
"loss": 0.6320611834526062,
"step": 3170
},
{
"epoch": 2.67003367003367,
"grad_norm": 4.853776931762695,
"learning_rate": 2.5920521329433606e-07,
"loss": 1.043792963027954,
"step": 3172
},
{
"epoch": 2.6717171717171717,
"grad_norm": 2.360769271850586,
"learning_rate": 2.586107599232164e-07,
"loss": 0.9384379386901855,
"step": 3174
},
{
"epoch": 2.6734006734006734,
"grad_norm": 14.25788402557373,
"learning_rate": 2.5801920549205023e-07,
"loss": 0.4818713068962097,
"step": 3176
},
{
"epoch": 2.675084175084175,
"grad_norm": 2.0616092681884766,
"learning_rate": 2.5743055203899167e-07,
"loss": 0.9861509799957275,
"step": 3178
},
{
"epoch": 2.676767676767677,
"grad_norm": 4.687266826629639,
"learning_rate": 2.568448015921996e-07,
"loss": 0.6932214498519897,
"step": 3180
},
{
"epoch": 2.678451178451178,
"grad_norm": 2.3194851875305176,
"learning_rate": 2.562619561698306e-07,
"loss": 0.7709292769432068,
"step": 3182
},
{
"epoch": 2.68013468013468,
"grad_norm": 2.256274461746216,
"learning_rate": 2.556820177800324e-07,
"loss": 0.8786018490791321,
"step": 3184
},
{
"epoch": 2.6818181818181817,
"grad_norm": 1.7933223247528076,
"learning_rate": 2.551049884209371e-07,
"loss": 0.7843552827835083,
"step": 3186
},
{
"epoch": 2.6835016835016834,
"grad_norm": 3.6488430500030518,
"learning_rate": 2.5453087008065307e-07,
"loss": 0.7388215661048889,
"step": 3188
},
{
"epoch": 2.685185185185185,
"grad_norm": 1.1536720991134644,
"learning_rate": 2.5395966473725994e-07,
"loss": 0.552982747554779,
"step": 3190
},
{
"epoch": 2.686868686868687,
"grad_norm": 3.049055814743042,
"learning_rate": 2.5339137435880043e-07,
"loss": 0.617717981338501,
"step": 3192
},
{
"epoch": 2.6885521885521886,
"grad_norm": 2.4993679523468018,
"learning_rate": 2.5282600090327383e-07,
"loss": 0.7265998125076294,
"step": 3194
},
{
"epoch": 2.6902356902356903,
"grad_norm": 12.052529335021973,
"learning_rate": 2.5226354631862966e-07,
"loss": 0.6202006340026855,
"step": 3196
},
{
"epoch": 2.691919191919192,
"grad_norm": 2.131632089614868,
"learning_rate": 2.517040125427608e-07,
"loss": 0.741972804069519,
"step": 3198
},
{
"epoch": 2.6936026936026938,
"grad_norm": 2.2996838092803955,
"learning_rate": 2.511474015034964e-07,
"loss": 0.8759193420410156,
"step": 3200
},
{
"epoch": 2.6952861952861955,
"grad_norm": 6.061952590942383,
"learning_rate": 2.5059371511859557e-07,
"loss": 0.6976549625396729,
"step": 3202
},
{
"epoch": 2.6969696969696972,
"grad_norm": 3.891650915145874,
"learning_rate": 2.50042955295741e-07,
"loss": 0.6694223880767822,
"step": 3204
},
{
"epoch": 2.6986531986531985,
"grad_norm": 5.893383026123047,
"learning_rate": 2.494951239325321e-07,
"loss": 0.7830284833908081,
"step": 3206
},
{
"epoch": 2.7003367003367003,
"grad_norm": 4.715972423553467,
"learning_rate": 2.489502229164781e-07,
"loss": 0.5429476499557495,
"step": 3208
},
{
"epoch": 2.702020202020202,
"grad_norm": 3.343920946121216,
"learning_rate": 2.4840825412499274e-07,
"loss": 0.8423386812210083,
"step": 3210
},
{
"epoch": 2.7037037037037037,
"grad_norm": 2.458588123321533,
"learning_rate": 2.478692194253861e-07,
"loss": 0.4965520203113556,
"step": 3212
},
{
"epoch": 2.7053872053872055,
"grad_norm": 2.6822140216827393,
"learning_rate": 2.473331206748597e-07,
"loss": 0.6127833127975464,
"step": 3214
},
{
"epoch": 2.707070707070707,
"grad_norm": 3.830547571182251,
"learning_rate": 2.467999597204996e-07,
"loss": 0.2938854694366455,
"step": 3216
},
{
"epoch": 2.708754208754209,
"grad_norm": 3.668973684310913,
"learning_rate": 2.462697383992691e-07,
"loss": 0.7545672655105591,
"step": 3218
},
{
"epoch": 2.71043771043771,
"grad_norm": 4.789590358734131,
"learning_rate": 2.457424585380041e-07,
"loss": 0.3368055820465088,
"step": 3220
},
{
"epoch": 2.712121212121212,
"grad_norm": 4.607179641723633,
"learning_rate": 2.4521812195340544e-07,
"loss": 0.7228003144264221,
"step": 3222
},
{
"epoch": 2.7138047138047137,
"grad_norm": 3.7761380672454834,
"learning_rate": 2.4469673045203333e-07,
"loss": 0.39306753873825073,
"step": 3224
},
{
"epoch": 2.7154882154882154,
"grad_norm": 3.8872487545013428,
"learning_rate": 2.441782858303007e-07,
"loss": 0.388794481754303,
"step": 3226
},
{
"epoch": 2.717171717171717,
"grad_norm": 3.936227560043335,
"learning_rate": 2.436627898744678e-07,
"loss": 0.7990210056304932,
"step": 3228
},
{
"epoch": 2.718855218855219,
"grad_norm": 10.530872344970703,
"learning_rate": 2.4315024436063464e-07,
"loss": 0.3864361643791199,
"step": 3230
},
{
"epoch": 2.7205387205387206,
"grad_norm": 8.344436645507812,
"learning_rate": 2.4264065105473637e-07,
"loss": 0.8147022724151611,
"step": 3232
},
{
"epoch": 2.7222222222222223,
"grad_norm": 1.8948400020599365,
"learning_rate": 2.4213401171253656e-07,
"loss": 0.6463346481323242,
"step": 3234
},
{
"epoch": 2.723905723905724,
"grad_norm": 2.3045897483825684,
"learning_rate": 2.416303280796206e-07,
"loss": 0.7769128084182739,
"step": 3236
},
{
"epoch": 2.725589225589226,
"grad_norm": 10.252862930297852,
"learning_rate": 2.411296018913907e-07,
"loss": 0.7157000303268433,
"step": 3238
},
{
"epoch": 2.7272727272727275,
"grad_norm": 12.489968299865723,
"learning_rate": 2.406318348730592e-07,
"loss": 0.7306414842605591,
"step": 3240
},
{
"epoch": 2.728956228956229,
"grad_norm": 3.00982666015625,
"learning_rate": 2.401370287396428e-07,
"loss": 0.8304033279418945,
"step": 3242
},
{
"epoch": 2.7306397306397305,
"grad_norm": 4.058210849761963,
"learning_rate": 2.396451851959571e-07,
"loss": 0.5530973672866821,
"step": 3244
},
{
"epoch": 2.7323232323232323,
"grad_norm": 4.974558353424072,
"learning_rate": 2.391563059366099e-07,
"loss": 0.7806906700134277,
"step": 3246
},
{
"epoch": 2.734006734006734,
"grad_norm": 10.766674995422363,
"learning_rate": 2.3867039264599587e-07,
"loss": 0.805009126663208,
"step": 3248
},
{
"epoch": 2.7356902356902357,
"grad_norm": 4.717216491699219,
"learning_rate": 2.3818744699829105e-07,
"loss": 0.6719311475753784,
"step": 3250
},
{
"epoch": 2.7373737373737375,
"grad_norm": 4.689093112945557,
"learning_rate": 2.3770747065744594e-07,
"loss": 0.37460649013519287,
"step": 3252
},
{
"epoch": 2.739057239057239,
"grad_norm": 3.905974864959717,
"learning_rate": 2.3723046527718137e-07,
"loss": 0.528462290763855,
"step": 3254
},
{
"epoch": 2.7407407407407405,
"grad_norm": 3.8697361946105957,
"learning_rate": 2.367564325009815e-07,
"loss": 0.4876176714897156,
"step": 3256
},
{
"epoch": 2.742424242424242,
"grad_norm": 2.9344778060913086,
"learning_rate": 2.362853739620885e-07,
"loss": 0.6226130723953247,
"step": 3258
},
{
"epoch": 2.744107744107744,
"grad_norm": 4.8839497566223145,
"learning_rate": 2.3581729128349745e-07,
"loss": 0.4137502908706665,
"step": 3260
},
{
"epoch": 2.7457912457912457,
"grad_norm": 2.9513931274414062,
"learning_rate": 2.3535218607795013e-07,
"loss": 0.6418605446815491,
"step": 3262
},
{
"epoch": 2.7474747474747474,
"grad_norm": 3.3043465614318848,
"learning_rate": 2.3489005994792948e-07,
"loss": 0.857982337474823,
"step": 3264
},
{
"epoch": 2.749158249158249,
"grad_norm": 5.111167907714844,
"learning_rate": 2.3443091448565454e-07,
"loss": 0.958759605884552,
"step": 3266
},
{
"epoch": 2.750841750841751,
"grad_norm": 74.0482406616211,
"learning_rate": 2.339747512730749e-07,
"loss": 0.4375573396682739,
"step": 3268
},
{
"epoch": 2.7525252525252526,
"grad_norm": 3.2530107498168945,
"learning_rate": 2.3352157188186424e-07,
"loss": 0.9555472135543823,
"step": 3270
},
{
"epoch": 2.7542087542087543,
"grad_norm": 44.573936462402344,
"learning_rate": 2.3307137787341667e-07,
"loss": 0.5092712044715881,
"step": 3272
},
{
"epoch": 2.755892255892256,
"grad_norm": 2.355350971221924,
"learning_rate": 2.3262417079883986e-07,
"loss": 0.7026905417442322,
"step": 3274
},
{
"epoch": 2.757575757575758,
"grad_norm": 13.51882553100586,
"learning_rate": 2.3217995219895016e-07,
"loss": 0.3385421633720398,
"step": 3276
},
{
"epoch": 2.7592592592592595,
"grad_norm": 1.3492799997329712,
"learning_rate": 2.317387236042678e-07,
"loss": 0.03149527311325073,
"step": 3278
},
{
"epoch": 2.760942760942761,
"grad_norm": 24.129674911499023,
"learning_rate": 2.313004865350109e-07,
"loss": 1.0571789741516113,
"step": 3280
},
{
"epoch": 2.7626262626262625,
"grad_norm": 2.9202077388763428,
"learning_rate": 2.3086524250109045e-07,
"loss": 1.0254530906677246,
"step": 3282
},
{
"epoch": 2.7643097643097643,
"grad_norm": 10.319761276245117,
"learning_rate": 2.3043299300210528e-07,
"loss": 0.2718232274055481,
"step": 3284
},
{
"epoch": 2.765993265993266,
"grad_norm": 4.364471435546875,
"learning_rate": 2.30003739527337e-07,
"loss": 0.7651864290237427,
"step": 3286
},
{
"epoch": 2.7676767676767677,
"grad_norm": 5.035273551940918,
"learning_rate": 2.2957748355574408e-07,
"loss": 0.7020351886749268,
"step": 3288
},
{
"epoch": 2.7693602693602695,
"grad_norm": 11.138975143432617,
"learning_rate": 2.2915422655595795e-07,
"loss": 0.20551855862140656,
"step": 3290
},
{
"epoch": 2.771043771043771,
"grad_norm": 5.818138599395752,
"learning_rate": 2.287339699862771e-07,
"loss": 0.9749652147293091,
"step": 3292
},
{
"epoch": 2.7727272727272725,
"grad_norm": 5.8484063148498535,
"learning_rate": 2.2831671529466205e-07,
"loss": 0.7997506260871887,
"step": 3294
},
{
"epoch": 2.774410774410774,
"grad_norm": 3.476667642593384,
"learning_rate": 2.2790246391873086e-07,
"loss": 0.8032985925674438,
"step": 3296
},
{
"epoch": 2.776094276094276,
"grad_norm": 4.120417594909668,
"learning_rate": 2.2749121728575393e-07,
"loss": 0.23050040006637573,
"step": 3298
},
{
"epoch": 2.7777777777777777,
"grad_norm": 3.6002514362335205,
"learning_rate": 2.2708297681264874e-07,
"loss": 0.45907649397850037,
"step": 3300
},
{
"epoch": 2.7794612794612794,
"grad_norm": 2.618075370788574,
"learning_rate": 2.2667774390597562e-07,
"loss": 0.4696184992790222,
"step": 3302
},
{
"epoch": 2.781144781144781,
"grad_norm": 6.530674457550049,
"learning_rate": 2.2627551996193247e-07,
"loss": 0.47576916217803955,
"step": 3304
},
{
"epoch": 2.782828282828283,
"grad_norm": 18.45606231689453,
"learning_rate": 2.2587630636634985e-07,
"loss": 0.6657184362411499,
"step": 3306
},
{
"epoch": 2.7845117845117846,
"grad_norm": 11.66965389251709,
"learning_rate": 2.2548010449468676e-07,
"loss": 0.48266786336898804,
"step": 3308
},
{
"epoch": 2.7861952861952863,
"grad_norm": 2.84804368019104,
"learning_rate": 2.2508691571202528e-07,
"loss": 0.6634323596954346,
"step": 3310
},
{
"epoch": 2.787878787878788,
"grad_norm": 3.701871395111084,
"learning_rate": 2.2469674137306627e-07,
"loss": 0.4185872972011566,
"step": 3312
},
{
"epoch": 2.78956228956229,
"grad_norm": 2.2695560455322266,
"learning_rate": 2.2430958282212414e-07,
"loss": 0.6932981014251709,
"step": 3314
},
{
"epoch": 2.791245791245791,
"grad_norm": 3.9276177883148193,
"learning_rate": 2.239254413931236e-07,
"loss": 0.9720036387443542,
"step": 3316
},
{
"epoch": 2.792929292929293,
"grad_norm": 3.183957099914551,
"learning_rate": 2.2354431840959307e-07,
"loss": 0.7453635334968567,
"step": 3318
},
{
"epoch": 2.7946127946127945,
"grad_norm": 4.194116115570068,
"learning_rate": 2.2316621518466167e-07,
"loss": 0.3255777359008789,
"step": 3320
},
{
"epoch": 2.7962962962962963,
"grad_norm": 5.5670366287231445,
"learning_rate": 2.227911330210542e-07,
"loss": 0.6090131998062134,
"step": 3322
},
{
"epoch": 2.797979797979798,
"grad_norm": 2.372026205062866,
"learning_rate": 2.2241907321108638e-07,
"loss": 0.6710550785064697,
"step": 3324
},
{
"epoch": 2.7996632996632997,
"grad_norm": 3.636491060256958,
"learning_rate": 2.22050037036661e-07,
"loss": 0.30255502462387085,
"step": 3326
},
{
"epoch": 2.8013468013468015,
"grad_norm": 3.7633321285247803,
"learning_rate": 2.216840257692628e-07,
"loss": 0.723252534866333,
"step": 3328
},
{
"epoch": 2.8030303030303028,
"grad_norm": 2.568369150161743,
"learning_rate": 2.213210406699547e-07,
"loss": 0.78731769323349,
"step": 3330
},
{
"epoch": 2.8047138047138045,
"grad_norm": 3.9559519290924072,
"learning_rate": 2.209610829893729e-07,
"loss": 0.5705679655075073,
"step": 3332
},
{
"epoch": 2.8063973063973062,
"grad_norm": 5.107378005981445,
"learning_rate": 2.2060415396772337e-07,
"loss": 0.4503876864910126,
"step": 3334
},
{
"epoch": 2.808080808080808,
"grad_norm": 3.7301788330078125,
"learning_rate": 2.2025025483477654e-07,
"loss": 0.5614144802093506,
"step": 3336
},
{
"epoch": 2.8097643097643097,
"grad_norm": 3.425426959991455,
"learning_rate": 2.1989938680986382e-07,
"loss": 0.27632904052734375,
"step": 3338
},
{
"epoch": 2.8114478114478114,
"grad_norm": 11.55947208404541,
"learning_rate": 2.1955155110187344e-07,
"loss": 0.6297179460525513,
"step": 3340
},
{
"epoch": 2.813131313131313,
"grad_norm": 5.041746139526367,
"learning_rate": 2.1920674890924545e-07,
"loss": 0.7801995873451233,
"step": 3342
},
{
"epoch": 2.814814814814815,
"grad_norm": 1.9846611022949219,
"learning_rate": 2.1886498141996858e-07,
"loss": 0.3154934346675873,
"step": 3344
},
{
"epoch": 2.8164983164983166,
"grad_norm": 3.4041101932525635,
"learning_rate": 2.185262498115759e-07,
"loss": 0.7565585374832153,
"step": 3346
},
{
"epoch": 2.8181818181818183,
"grad_norm": 5.533918380737305,
"learning_rate": 2.1819055525113995e-07,
"loss": 0.5513463020324707,
"step": 3348
},
{
"epoch": 2.81986531986532,
"grad_norm": 3.816920042037964,
"learning_rate": 2.178578988952698e-07,
"loss": 0.8172674179077148,
"step": 3350
},
{
"epoch": 2.821548821548822,
"grad_norm": 4.7206573486328125,
"learning_rate": 2.1752828189010677e-07,
"loss": 0.7926508188247681,
"step": 3352
},
{
"epoch": 2.823232323232323,
"grad_norm": 2.8711562156677246,
"learning_rate": 2.1720170537132003e-07,
"loss": 0.7785905599594116,
"step": 3354
},
{
"epoch": 2.824915824915825,
"grad_norm": 7.083092212677002,
"learning_rate": 2.16878170464103e-07,
"loss": 0.8117780685424805,
"step": 3356
},
{
"epoch": 2.8265993265993266,
"grad_norm": 6.3713178634643555,
"learning_rate": 2.1655767828316967e-07,
"loss": 0.4899190068244934,
"step": 3358
},
{
"epoch": 2.8282828282828283,
"grad_norm": 8.093062400817871,
"learning_rate": 2.1624022993275042e-07,
"loss": 0.481950581073761,
"step": 3360
},
{
"epoch": 2.82996632996633,
"grad_norm": 3.7031800746917725,
"learning_rate": 2.1592582650658838e-07,
"loss": 0.6889939308166504,
"step": 3362
},
{
"epoch": 2.8316498316498318,
"grad_norm": 8.515325546264648,
"learning_rate": 2.1561446908793575e-07,
"loss": 0.5986655950546265,
"step": 3364
},
{
"epoch": 2.8333333333333335,
"grad_norm": 2.5616695880889893,
"learning_rate": 2.1530615874954978e-07,
"loss": 0.4613681137561798,
"step": 3366
},
{
"epoch": 2.8350168350168348,
"grad_norm": 6.432313919067383,
"learning_rate": 2.1500089655368913e-07,
"loss": 0.35357874631881714,
"step": 3368
},
{
"epoch": 2.8367003367003365,
"grad_norm": 5.070071220397949,
"learning_rate": 2.146986835521108e-07,
"loss": 0.815057635307312,
"step": 3370
},
{
"epoch": 2.8383838383838382,
"grad_norm": 1.3125436305999756,
"learning_rate": 2.143995207860655e-07,
"loss": 0.6456162929534912,
"step": 3372
},
{
"epoch": 2.84006734006734,
"grad_norm": 50.76771545410156,
"learning_rate": 2.1410340928629483e-07,
"loss": 0.29310160875320435,
"step": 3374
},
{
"epoch": 2.8417508417508417,
"grad_norm": 2.078246831893921,
"learning_rate": 2.138103500730278e-07,
"loss": 0.851909875869751,
"step": 3376
},
{
"epoch": 2.8434343434343434,
"grad_norm": 2.2148220539093018,
"learning_rate": 2.1352034415597635e-07,
"loss": 0.7448092699050903,
"step": 3378
},
{
"epoch": 2.845117845117845,
"grad_norm": 2.512826919555664,
"learning_rate": 2.1323339253433309e-07,
"loss": 0.5352383255958557,
"step": 3380
},
{
"epoch": 2.846801346801347,
"grad_norm": 5.046896934509277,
"learning_rate": 2.1294949619676717e-07,
"loss": 0.522847056388855,
"step": 3382
},
{
"epoch": 2.8484848484848486,
"grad_norm": 4.314877033233643,
"learning_rate": 2.1266865612142064e-07,
"loss": 0.5352615118026733,
"step": 3384
},
{
"epoch": 2.8501683501683504,
"grad_norm": 3.3411834239959717,
"learning_rate": 2.1239087327590582e-07,
"loss": 0.7238250970840454,
"step": 3386
},
{
"epoch": 2.851851851851852,
"grad_norm": 0.83232581615448,
"learning_rate": 2.121161486173017e-07,
"loss": 0.6121417284011841,
"step": 3388
},
{
"epoch": 2.8535353535353534,
"grad_norm": 8.091914176940918,
"learning_rate": 2.1184448309215015e-07,
"loss": 0.4724659025669098,
"step": 3390
},
{
"epoch": 2.855218855218855,
"grad_norm": 3.3312911987304688,
"learning_rate": 2.1157587763645322e-07,
"loss": 0.5098093748092651,
"step": 3392
},
{
"epoch": 2.856902356902357,
"grad_norm": 5.780312538146973,
"learning_rate": 2.113103331756698e-07,
"loss": 0.9295372366905212,
"step": 3394
},
{
"epoch": 2.8585858585858586,
"grad_norm": 2.5686521530151367,
"learning_rate": 2.110478506247122e-07,
"loss": 0.9365147948265076,
"step": 3396
},
{
"epoch": 2.8602693602693603,
"grad_norm": 2.75380277633667,
"learning_rate": 2.1078843088794325e-07,
"loss": 0.4805770516395569,
"step": 3398
},
{
"epoch": 2.861952861952862,
"grad_norm": 14.623507499694824,
"learning_rate": 2.105320748591732e-07,
"loss": 0.38062724471092224,
"step": 3400
},
{
"epoch": 2.8636363636363638,
"grad_norm": 47.26361846923828,
"learning_rate": 2.1027878342165624e-07,
"loss": 0.4569489359855652,
"step": 3402
},
{
"epoch": 2.865319865319865,
"grad_norm": 2.116769313812256,
"learning_rate": 2.1002855744808815e-07,
"loss": 0.34320202469825745,
"step": 3404
},
{
"epoch": 2.8670033670033668,
"grad_norm": 4.610642910003662,
"learning_rate": 2.0978139780060257e-07,
"loss": 0.7092417478561401,
"step": 3406
},
{
"epoch": 2.8686868686868685,
"grad_norm": 4.693014144897461,
"learning_rate": 2.0953730533076862e-07,
"loss": 0.29190459847450256,
"step": 3408
},
{
"epoch": 2.8703703703703702,
"grad_norm": 3.3123207092285156,
"learning_rate": 2.0929628087958734e-07,
"loss": 0.7917627692222595,
"step": 3410
},
{
"epoch": 2.872053872053872,
"grad_norm": 1.7922461032867432,
"learning_rate": 2.0905832527748953e-07,
"loss": 0.43554821610450745,
"step": 3412
},
{
"epoch": 2.8737373737373737,
"grad_norm": 4.745511054992676,
"learning_rate": 2.0882343934433236e-07,
"loss": 0.5983174443244934,
"step": 3414
},
{
"epoch": 2.8754208754208754,
"grad_norm": 6.916215896606445,
"learning_rate": 2.085916238893966e-07,
"loss": 0.17676572501659393,
"step": 3416
},
{
"epoch": 2.877104377104377,
"grad_norm": 4.048447132110596,
"learning_rate": 2.0836287971138418e-07,
"loss": 0.6077107191085815,
"step": 3418
},
{
"epoch": 2.878787878787879,
"grad_norm": 2.5704290866851807,
"learning_rate": 2.0813720759841492e-07,
"loss": 0.4146248400211334,
"step": 3420
},
{
"epoch": 2.8804713804713806,
"grad_norm": 5.706145286560059,
"learning_rate": 2.0791460832802423e-07,
"loss": 0.7497705221176147,
"step": 3422
},
{
"epoch": 2.8821548821548824,
"grad_norm": 1.7757506370544434,
"learning_rate": 2.0769508266716027e-07,
"loss": 0.5505831241607666,
"step": 3424
},
{
"epoch": 2.883838383838384,
"grad_norm": 7.052734851837158,
"learning_rate": 2.0747863137218126e-07,
"loss": 0.6165893077850342,
"step": 3426
},
{
"epoch": 2.8855218855218854,
"grad_norm": 5.826257705688477,
"learning_rate": 2.0726525518885308e-07,
"loss": 0.5343178510665894,
"step": 3428
},
{
"epoch": 2.887205387205387,
"grad_norm": 8.041903495788574,
"learning_rate": 2.0705495485234653e-07,
"loss": 0.3310260772705078,
"step": 3430
},
{
"epoch": 2.888888888888889,
"grad_norm": 15.362848281860352,
"learning_rate": 2.0684773108723455e-07,
"loss": 0.5320956707000732,
"step": 3432
},
{
"epoch": 2.8905723905723906,
"grad_norm": 7.592126369476318,
"learning_rate": 2.0664358460749018e-07,
"loss": 0.29516857862472534,
"step": 3434
},
{
"epoch": 2.8922558922558923,
"grad_norm": 1.8380248546600342,
"learning_rate": 2.064425161164842e-07,
"loss": 0.9136509895324707,
"step": 3436
},
{
"epoch": 2.893939393939394,
"grad_norm": 4.288794994354248,
"learning_rate": 2.0624452630698195e-07,
"loss": 0.8272508382797241,
"step": 3438
},
{
"epoch": 2.8956228956228958,
"grad_norm": 3.879866600036621,
"learning_rate": 2.0604961586114163e-07,
"loss": 0.744123101234436,
"step": 3440
},
{
"epoch": 2.897306397306397,
"grad_norm": 3.288698196411133,
"learning_rate": 2.0585778545051195e-07,
"loss": 0.8894016742706299,
"step": 3442
},
{
"epoch": 2.898989898989899,
"grad_norm": 15.847039222717285,
"learning_rate": 2.0566903573602913e-07,
"loss": 0.2585524320602417,
"step": 3444
},
{
"epoch": 2.9006734006734005,
"grad_norm": 4.235921859741211,
"learning_rate": 2.0548336736801548e-07,
"loss": 0.5225664377212524,
"step": 3446
},
{
"epoch": 2.9023569023569022,
"grad_norm": 5.334314346313477,
"learning_rate": 2.0530078098617668e-07,
"loss": 1.000659704208374,
"step": 3448
},
{
"epoch": 2.904040404040404,
"grad_norm": 13.81791877746582,
"learning_rate": 2.0512127721959954e-07,
"loss": 0.2958747446537018,
"step": 3450
},
{
"epoch": 2.9057239057239057,
"grad_norm": 2.8504996299743652,
"learning_rate": 2.0494485668675003e-07,
"loss": 0.5946668386459351,
"step": 3452
},
{
"epoch": 2.9074074074074074,
"grad_norm": 30.945682525634766,
"learning_rate": 2.0477151999547137e-07,
"loss": 0.6222255229949951,
"step": 3454
},
{
"epoch": 2.909090909090909,
"grad_norm": 2.8661885261535645,
"learning_rate": 2.0460126774298115e-07,
"loss": 0.9090818166732788,
"step": 3456
},
{
"epoch": 2.910774410774411,
"grad_norm": 3.6362955570220947,
"learning_rate": 2.044341005158701e-07,
"loss": 0.6454827785491943,
"step": 3458
},
{
"epoch": 2.9124579124579126,
"grad_norm": 5.509945392608643,
"learning_rate": 2.042700188900996e-07,
"loss": 0.8902723789215088,
"step": 3460
},
{
"epoch": 2.9141414141414144,
"grad_norm": 4.623058795928955,
"learning_rate": 2.0410902343099998e-07,
"loss": 0.9835023283958435,
"step": 3462
},
{
"epoch": 2.915824915824916,
"grad_norm": 5.559566020965576,
"learning_rate": 2.039511146932683e-07,
"loss": 0.725146472454071,
"step": 3464
},
{
"epoch": 2.9175084175084174,
"grad_norm": 2.3381059169769287,
"learning_rate": 2.0379629322096658e-07,
"loss": 0.8742655515670776,
"step": 3466
},
{
"epoch": 2.919191919191919,
"grad_norm": 3.1581509113311768,
"learning_rate": 2.036445595475199e-07,
"loss": 0.5896962881088257,
"step": 3468
},
{
"epoch": 2.920875420875421,
"grad_norm": 2.895928382873535,
"learning_rate": 2.0349591419571473e-07,
"loss": 0.08913551270961761,
"step": 3470
},
{
"epoch": 2.9225589225589226,
"grad_norm": 3.939779758453369,
"learning_rate": 2.0335035767769674e-07,
"loss": 0.5938529968261719,
"step": 3472
},
{
"epoch": 2.9242424242424243,
"grad_norm": 2.6540651321411133,
"learning_rate": 2.032078904949694e-07,
"loss": 0.607816755771637,
"step": 3474
},
{
"epoch": 2.925925925925926,
"grad_norm": 11.374692916870117,
"learning_rate": 2.0306851313839217e-07,
"loss": 0.26831308007240295,
"step": 3476
},
{
"epoch": 2.9276094276094278,
"grad_norm": 4.051253318786621,
"learning_rate": 2.0293222608817862e-07,
"loss": 0.776150107383728,
"step": 3478
},
{
"epoch": 2.929292929292929,
"grad_norm": 6.790820121765137,
"learning_rate": 2.0279902981389491e-07,
"loss": 0.44397690892219543,
"step": 3480
},
{
"epoch": 2.930976430976431,
"grad_norm": 4.825781345367432,
"learning_rate": 2.026689247744584e-07,
"loss": 0.7775415182113647,
"step": 3482
},
{
"epoch": 2.9326599326599325,
"grad_norm": 3.1354546546936035,
"learning_rate": 2.0254191141813563e-07,
"loss": 0.5349434614181519,
"step": 3484
},
{
"epoch": 2.9343434343434343,
"grad_norm": 3.595128059387207,
"learning_rate": 2.0241799018254102e-07,
"loss": 0.6211014986038208,
"step": 3486
},
{
"epoch": 2.936026936026936,
"grad_norm": 4.181585311889648,
"learning_rate": 2.0229716149463543e-07,
"loss": 0.6584489345550537,
"step": 3488
},
{
"epoch": 2.9377104377104377,
"grad_norm": 5.394354343414307,
"learning_rate": 2.0217942577072447e-07,
"loss": 0.5959441661834717,
"step": 3490
},
{
"epoch": 2.9393939393939394,
"grad_norm": 13.857940673828125,
"learning_rate": 2.0206478341645734e-07,
"loss": 0.8532196283340454,
"step": 3492
},
{
"epoch": 2.941077441077441,
"grad_norm": 6.366513252258301,
"learning_rate": 2.0195323482682508e-07,
"loss": 0.3821958899497986,
"step": 3494
},
{
"epoch": 2.942760942760943,
"grad_norm": 2.0421321392059326,
"learning_rate": 2.0184478038615948e-07,
"loss": 0.7394722700119019,
"step": 3496
},
{
"epoch": 2.9444444444444446,
"grad_norm": 4.313158988952637,
"learning_rate": 2.0173942046813191e-07,
"loss": 0.2922773063182831,
"step": 3498
},
{
"epoch": 2.9461279461279464,
"grad_norm": 5.628312110900879,
"learning_rate": 2.016371554357515e-07,
"loss": 0.608026385307312,
"step": 3500
},
{
"epoch": 2.9478114478114477,
"grad_norm": 10.177474975585938,
"learning_rate": 2.015379856413643e-07,
"loss": 0.684483528137207,
"step": 3502
},
{
"epoch": 2.9494949494949494,
"grad_norm": 9.977062225341797,
"learning_rate": 2.01441911426652e-07,
"loss": 0.36152565479278564,
"step": 3504
},
{
"epoch": 2.951178451178451,
"grad_norm": 1.5593669414520264,
"learning_rate": 2.013489331226307e-07,
"loss": 0.6608873009681702,
"step": 3506
},
{
"epoch": 2.952861952861953,
"grad_norm": 3.423954486846924,
"learning_rate": 2.0125905104964978e-07,
"loss": 0.8101043701171875,
"step": 3508
},
{
"epoch": 2.9545454545454546,
"grad_norm": 4.263778209686279,
"learning_rate": 2.0117226551739068e-07,
"loss": 0.7046741247177124,
"step": 3510
},
{
"epoch": 2.9562289562289563,
"grad_norm": 3.3937125205993652,
"learning_rate": 2.0108857682486629e-07,
"loss": 0.7705718874931335,
"step": 3512
},
{
"epoch": 2.957912457912458,
"grad_norm": 10.03588581085205,
"learning_rate": 2.0100798526041927e-07,
"loss": 0.31763288378715515,
"step": 3514
},
{
"epoch": 2.9595959595959593,
"grad_norm": 3.6547443866729736,
"learning_rate": 2.009304911017215e-07,
"loss": 0.8195918202400208,
"step": 3516
},
{
"epoch": 2.961279461279461,
"grad_norm": 2.8320508003234863,
"learning_rate": 2.0085609461577295e-07,
"loss": 0.871679425239563,
"step": 3518
},
{
"epoch": 2.962962962962963,
"grad_norm": 5.754692554473877,
"learning_rate": 2.0078479605890064e-07,
"loss": 0.3950427770614624,
"step": 3520
},
{
"epoch": 2.9646464646464645,
"grad_norm": 3.0160629749298096,
"learning_rate": 2.007165956767584e-07,
"loss": 0.65765380859375,
"step": 3522
},
{
"epoch": 2.9663299663299663,
"grad_norm": 5.943231105804443,
"learning_rate": 2.00651493704325e-07,
"loss": 0.2477177381515503,
"step": 3524
},
{
"epoch": 2.968013468013468,
"grad_norm": 6.068716049194336,
"learning_rate": 2.0058949036590426e-07,
"loss": 0.8671658039093018,
"step": 3526
},
{
"epoch": 2.9696969696969697,
"grad_norm": 2.297165632247925,
"learning_rate": 2.0053058587512378e-07,
"loss": 0.7299938201904297,
"step": 3528
},
{
"epoch": 2.9713804713804715,
"grad_norm": 3.451326847076416,
"learning_rate": 2.0047478043493418e-07,
"loss": 0.7638918161392212,
"step": 3530
},
{
"epoch": 2.973063973063973,
"grad_norm": 5.721773147583008,
"learning_rate": 2.004220742376088e-07,
"loss": 0.6010457873344421,
"step": 3532
},
{
"epoch": 2.974747474747475,
"grad_norm": 11.908121109008789,
"learning_rate": 2.0037246746474277e-07,
"loss": 0.21666747331619263,
"step": 3534
},
{
"epoch": 2.9764309764309766,
"grad_norm": 2.7472894191741943,
"learning_rate": 2.0032596028725204e-07,
"loss": 0.828637421131134,
"step": 3536
},
{
"epoch": 2.9781144781144784,
"grad_norm": 7.899786949157715,
"learning_rate": 2.0028255286537355e-07,
"loss": 0.4242842197418213,
"step": 3538
},
{
"epoch": 2.9797979797979797,
"grad_norm": 2.2358016967773438,
"learning_rate": 2.0024224534866408e-07,
"loss": 0.9581695795059204,
"step": 3540
},
{
"epoch": 2.9814814814814814,
"grad_norm": 4.023903846740723,
"learning_rate": 2.0020503787599998e-07,
"loss": 0.8976711630821228,
"step": 3542
},
{
"epoch": 2.983164983164983,
"grad_norm": 5.354180812835693,
"learning_rate": 2.001709305755767e-07,
"loss": 0.47080734372138977,
"step": 3544
},
{
"epoch": 2.984848484848485,
"grad_norm": 6.203042507171631,
"learning_rate": 2.0013992356490827e-07,
"loss": 0.799166202545166,
"step": 3546
},
{
"epoch": 2.9865319865319866,
"grad_norm": 6.4163031578063965,
"learning_rate": 2.0011201695082687e-07,
"loss": 0.30166110396385193,
"step": 3548
},
{
"epoch": 2.9882154882154883,
"grad_norm": 9.541460037231445,
"learning_rate": 2.0008721082948243e-07,
"loss": 0.3377661108970642,
"step": 3550
},
{
"epoch": 2.98989898989899,
"grad_norm": 12.612906455993652,
"learning_rate": 2.0006550528634258e-07,
"loss": 0.4944566488265991,
"step": 3552
},
{
"epoch": 2.9915824915824913,
"grad_norm": 1.854871153831482,
"learning_rate": 2.00046900396192e-07,
"loss": 0.9397309422492981,
"step": 3554
},
{
"epoch": 2.993265993265993,
"grad_norm": 2.197124719619751,
"learning_rate": 2.0003139622313241e-07,
"loss": 0.7814288139343262,
"step": 3556
},
{
"epoch": 2.994949494949495,
"grad_norm": 2.3128502368927,
"learning_rate": 2.0001899282058216e-07,
"loss": 0.6661207675933838,
"step": 3558
},
{
"epoch": 2.9966329966329965,
"grad_norm": 12.201488494873047,
"learning_rate": 2.000096902312762e-07,
"loss": 0.40893661975860596,
"step": 3560
},
{
"epoch": 2.9983164983164983,
"grad_norm": 4.00324821472168,
"learning_rate": 2.0000348848726586e-07,
"loss": 0.5416642427444458,
"step": 3562
},
{
"epoch": 3.0,
"grad_norm": 11.186657905578613,
"learning_rate": 2.0000038760991877e-07,
"loss": 0.361904501914978,
"step": 3564
},
{
"epoch": 3.0,
"step": 3564,
"total_flos": 4.2988160857187287e+18,
"train_loss": 0.7978645538875685,
"train_runtime": 6311.8591,
"train_samples_per_second": 9.034,
"train_steps_per_second": 0.565
}
],
"logging_steps": 2,
"max_steps": 3564,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 99999,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.2988160857187287e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}