9b-91 / trainer_state.json
furproxy's picture
Upload folder using huggingface_hub
f00e926 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 500,
"global_step": 1892,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004228329809725159,
"grad_norm": 6.605544090270996,
"learning_rate": 5.263157894736842e-08,
"loss": 1.8237226009368896,
"step": 2
},
{
"epoch": 0.008456659619450317,
"grad_norm": 0.9109106659889221,
"learning_rate": 1.5789473684210527e-07,
"loss": 2.176421880722046,
"step": 4
},
{
"epoch": 0.012684989429175475,
"grad_norm": 3.9910192489624023,
"learning_rate": 2.6315789473684213e-07,
"loss": 2.1531057357788086,
"step": 6
},
{
"epoch": 0.016913319238900635,
"grad_norm": 0.9899866580963135,
"learning_rate": 3.6842105263157896e-07,
"loss": 1.9564805030822754,
"step": 8
},
{
"epoch": 0.021141649048625793,
"grad_norm": 2.9574663639068604,
"learning_rate": 4.7368421052631585e-07,
"loss": 2.021973133087158,
"step": 10
},
{
"epoch": 0.02536997885835095,
"grad_norm": 2.2470693588256836,
"learning_rate": 5.789473684210526e-07,
"loss": 1.692598581314087,
"step": 12
},
{
"epoch": 0.02959830866807611,
"grad_norm": 1.5818345546722412,
"learning_rate": 6.842105263157896e-07,
"loss": 1.6616182327270508,
"step": 14
},
{
"epoch": 0.03382663847780127,
"grad_norm": 1.9749239683151245,
"learning_rate": 7.894736842105263e-07,
"loss": 1.8213186264038086,
"step": 16
},
{
"epoch": 0.03805496828752643,
"grad_norm": 1.0426429510116577,
"learning_rate": 8.947368421052632e-07,
"loss": 1.8437881469726562,
"step": 18
},
{
"epoch": 0.042283298097251586,
"grad_norm": 15.372987747192383,
"learning_rate": 1.0000000000000002e-06,
"loss": 2.0638184547424316,
"step": 20
},
{
"epoch": 0.046511627906976744,
"grad_norm": 4.7466301918029785,
"learning_rate": 1.1052631578947369e-06,
"loss": 1.948048710823059,
"step": 22
},
{
"epoch": 0.0507399577167019,
"grad_norm": 1.4575281143188477,
"learning_rate": 1.2105263157894738e-06,
"loss": 1.8744062185287476,
"step": 24
},
{
"epoch": 0.05496828752642706,
"grad_norm": 3.336325168609619,
"learning_rate": 1.3157894736842106e-06,
"loss": 1.715809941291809,
"step": 26
},
{
"epoch": 0.05919661733615222,
"grad_norm": 0.6526132822036743,
"learning_rate": 1.4210526315789475e-06,
"loss": 1.750809907913208,
"step": 28
},
{
"epoch": 0.06342494714587738,
"grad_norm": 0.8683815002441406,
"learning_rate": 1.5263157894736844e-06,
"loss": 1.5532337427139282,
"step": 30
},
{
"epoch": 0.06765327695560254,
"grad_norm": 4.369999885559082,
"learning_rate": 1.6315789473684212e-06,
"loss": 1.2949138879776,
"step": 32
},
{
"epoch": 0.07188160676532769,
"grad_norm": 0.8993115425109863,
"learning_rate": 1.736842105263158e-06,
"loss": 1.3249835968017578,
"step": 34
},
{
"epoch": 0.07610993657505286,
"grad_norm": 1.2760223150253296,
"learning_rate": 1.8421052631578948e-06,
"loss": 1.4548490047454834,
"step": 36
},
{
"epoch": 0.080338266384778,
"grad_norm": 5.320272445678711,
"learning_rate": 1.9473684210526315e-06,
"loss": 1.3651189804077148,
"step": 38
},
{
"epoch": 0.08456659619450317,
"grad_norm": 4.702236175537109,
"learning_rate": 2.0526315789473687e-06,
"loss": 1.316279649734497,
"step": 40
},
{
"epoch": 0.08879492600422834,
"grad_norm": 5.524050712585449,
"learning_rate": 2.1578947368421054e-06,
"loss": 1.7164320945739746,
"step": 42
},
{
"epoch": 0.09302325581395349,
"grad_norm": 1.7671512365341187,
"learning_rate": 2.2631578947368426e-06,
"loss": 1.695072889328003,
"step": 44
},
{
"epoch": 0.09725158562367865,
"grad_norm": 0.7419072985649109,
"learning_rate": 2.368421052631579e-06,
"loss": 1.3463151454925537,
"step": 46
},
{
"epoch": 0.1014799154334038,
"grad_norm": 0.44772660732269287,
"learning_rate": 2.473684210526316e-06,
"loss": 1.5603983402252197,
"step": 48
},
{
"epoch": 0.10570824524312897,
"grad_norm": 0.9128488302230835,
"learning_rate": 2.578947368421053e-06,
"loss": 1.5553311109542847,
"step": 50
},
{
"epoch": 0.10993657505285412,
"grad_norm": 2.591745138168335,
"learning_rate": 2.68421052631579e-06,
"loss": 0.8912559747695923,
"step": 52
},
{
"epoch": 0.11416490486257928,
"grad_norm": 0.4358270466327667,
"learning_rate": 2.789473684210526e-06,
"loss": 1.115212321281433,
"step": 54
},
{
"epoch": 0.11839323467230443,
"grad_norm": 0.9643327593803406,
"learning_rate": 2.8947368421052634e-06,
"loss": 1.4279577732086182,
"step": 56
},
{
"epoch": 0.1226215644820296,
"grad_norm": 0.7719278931617737,
"learning_rate": 3e-06,
"loss": 1.4487831592559814,
"step": 58
},
{
"epoch": 0.12684989429175475,
"grad_norm": 0.7415221929550171,
"learning_rate": 3.1052631578947372e-06,
"loss": 1.4636942148208618,
"step": 60
},
{
"epoch": 0.13107822410147993,
"grad_norm": 1.0166652202606201,
"learning_rate": 3.210526315789474e-06,
"loss": 0.8027479648590088,
"step": 62
},
{
"epoch": 0.13530655391120508,
"grad_norm": 0.8704442381858826,
"learning_rate": 3.3157894736842107e-06,
"loss": 1.042100429534912,
"step": 64
},
{
"epoch": 0.13953488372093023,
"grad_norm": 0.5612062811851501,
"learning_rate": 3.421052631578948e-06,
"loss": 1.3720247745513916,
"step": 66
},
{
"epoch": 0.14376321353065538,
"grad_norm": 0.9619214534759521,
"learning_rate": 3.5263157894736846e-06,
"loss": 1.4121863842010498,
"step": 68
},
{
"epoch": 0.14799154334038056,
"grad_norm": 0.7827504277229309,
"learning_rate": 3.6315789473684217e-06,
"loss": 1.409840703010559,
"step": 70
},
{
"epoch": 0.1522198731501057,
"grad_norm": 0.7777084708213806,
"learning_rate": 3.736842105263158e-06,
"loss": 1.3885422945022583,
"step": 72
},
{
"epoch": 0.15644820295983086,
"grad_norm": 0.7667552828788757,
"learning_rate": 3.842105263157895e-06,
"loss": 0.9778481125831604,
"step": 74
},
{
"epoch": 0.160676532769556,
"grad_norm": 0.7666372060775757,
"learning_rate": 3.947368421052632e-06,
"loss": 0.8655365705490112,
"step": 76
},
{
"epoch": 0.1649048625792812,
"grad_norm": 1.127411961555481,
"learning_rate": 4.052631578947368e-06,
"loss": 1.3328880071640015,
"step": 78
},
{
"epoch": 0.16913319238900634,
"grad_norm": 1.8701919317245483,
"learning_rate": 4.157894736842106e-06,
"loss": 1.1148747205734253,
"step": 80
},
{
"epoch": 0.1733615221987315,
"grad_norm": 0.7047215104103088,
"learning_rate": 4.2631578947368425e-06,
"loss": 1.1087937355041504,
"step": 82
},
{
"epoch": 0.17758985200845667,
"grad_norm": 1.534998893737793,
"learning_rate": 4.368421052631579e-06,
"loss": 0.9685766696929932,
"step": 84
},
{
"epoch": 0.18181818181818182,
"grad_norm": 1.2067896127700806,
"learning_rate": 4.473684210526316e-06,
"loss": 1.7643671035766602,
"step": 86
},
{
"epoch": 0.18604651162790697,
"grad_norm": 1.5933588743209839,
"learning_rate": 4.578947368421053e-06,
"loss": 1.3110942840576172,
"step": 88
},
{
"epoch": 0.19027484143763213,
"grad_norm": 4.7901225090026855,
"learning_rate": 4.68421052631579e-06,
"loss": 1.0165361166000366,
"step": 90
},
{
"epoch": 0.1945031712473573,
"grad_norm": 0.7400741577148438,
"learning_rate": 4.789473684210527e-06,
"loss": 1.3473522663116455,
"step": 92
},
{
"epoch": 0.19873150105708245,
"grad_norm": 1.6431527137756348,
"learning_rate": 4.894736842105264e-06,
"loss": 0.9921259880065918,
"step": 94
},
{
"epoch": 0.2029598308668076,
"grad_norm": 1.1242965459823608,
"learning_rate": 5e-06,
"loss": 1.3252302408218384,
"step": 96
},
{
"epoch": 0.20718816067653276,
"grad_norm": 1.4306470155715942,
"learning_rate": 4.999986246423023e-06,
"loss": 1.4119060039520264,
"step": 98
},
{
"epoch": 0.21141649048625794,
"grad_norm": 0.6519229412078857,
"learning_rate": 4.999944985860234e-06,
"loss": 1.3824262619018555,
"step": 100
},
{
"epoch": 0.2156448202959831,
"grad_norm": 0.9801005721092224,
"learning_rate": 4.9998762188160604e-06,
"loss": 1.2583341598510742,
"step": 102
},
{
"epoch": 0.21987315010570824,
"grad_norm": 0.6960574388504028,
"learning_rate": 4.999779946131206e-06,
"loss": 1.3352376222610474,
"step": 104
},
{
"epoch": 0.22410147991543342,
"grad_norm": 0.937359631061554,
"learning_rate": 4.9996561689826455e-06,
"loss": 1.3265419006347656,
"step": 106
},
{
"epoch": 0.22832980972515857,
"grad_norm": 0.973751962184906,
"learning_rate": 4.999504888883601e-06,
"loss": 1.0313334465026855,
"step": 108
},
{
"epoch": 0.23255813953488372,
"grad_norm": 0.8326078653335571,
"learning_rate": 4.999326107683535e-06,
"loss": 1.3112177848815918,
"step": 110
},
{
"epoch": 0.23678646934460887,
"grad_norm": 1.3524088859558105,
"learning_rate": 4.999119827568119e-06,
"loss": 1.379159688949585,
"step": 112
},
{
"epoch": 0.24101479915433405,
"grad_norm": 0.8450507521629333,
"learning_rate": 4.9988860510592085e-06,
"loss": 1.4243919849395752,
"step": 114
},
{
"epoch": 0.2452431289640592,
"grad_norm": 1.4355442523956299,
"learning_rate": 4.998624781014819e-06,
"loss": 1.3441710472106934,
"step": 116
},
{
"epoch": 0.24947145877378435,
"grad_norm": 1.2588502168655396,
"learning_rate": 4.998336020629077e-06,
"loss": 1.3617056608200073,
"step": 118
},
{
"epoch": 0.2536997885835095,
"grad_norm": 1.3343617916107178,
"learning_rate": 4.998019773432198e-06,
"loss": 0.8026182055473328,
"step": 120
},
{
"epoch": 0.25792811839323465,
"grad_norm": 0.6089492440223694,
"learning_rate": 4.997676043290429e-06,
"loss": 1.320847988128662,
"step": 122
},
{
"epoch": 0.26215644820295986,
"grad_norm": 0.6601752042770386,
"learning_rate": 4.997304834406011e-06,
"loss": 1.1560726165771484,
"step": 124
},
{
"epoch": 0.266384778012685,
"grad_norm": 0.9852200746536255,
"learning_rate": 4.9969061513171185e-06,
"loss": 1.3645535707473755,
"step": 126
},
{
"epoch": 0.27061310782241016,
"grad_norm": 1.04742431640625,
"learning_rate": 4.996479998897815e-06,
"loss": 1.0370509624481201,
"step": 128
},
{
"epoch": 0.2748414376321353,
"grad_norm": 0.768661379814148,
"learning_rate": 4.996026382357985e-06,
"loss": 0.976492166519165,
"step": 130
},
{
"epoch": 0.27906976744186046,
"grad_norm": 1.244371771812439,
"learning_rate": 4.995545307243273e-06,
"loss": 1.290363073348999,
"step": 132
},
{
"epoch": 0.2832980972515856,
"grad_norm": 1.2053449153900146,
"learning_rate": 4.995036779435014e-06,
"loss": 0.8751378655433655,
"step": 134
},
{
"epoch": 0.28752642706131076,
"grad_norm": 2.1075494289398193,
"learning_rate": 4.994500805150167e-06,
"loss": 1.123706579208374,
"step": 136
},
{
"epoch": 0.2917547568710359,
"grad_norm": 2.0092146396636963,
"learning_rate": 4.993937390941231e-06,
"loss": 1.4683767557144165,
"step": 138
},
{
"epoch": 0.2959830866807611,
"grad_norm": 1.5879631042480469,
"learning_rate": 4.9933465436961705e-06,
"loss": 0.9096964597702026,
"step": 140
},
{
"epoch": 0.30021141649048627,
"grad_norm": 1.2290595769882202,
"learning_rate": 4.992728270638333e-06,
"loss": 1.5735292434692383,
"step": 142
},
{
"epoch": 0.3044397463002114,
"grad_norm": 16.471097946166992,
"learning_rate": 4.992082579326354e-06,
"loss": 1.1104016304016113,
"step": 144
},
{
"epoch": 0.3086680761099366,
"grad_norm": 3.488492250442505,
"learning_rate": 4.9914094776540676e-06,
"loss": 0.6830090284347534,
"step": 146
},
{
"epoch": 0.3128964059196617,
"grad_norm": 1.9017225503921509,
"learning_rate": 4.990708973850415e-06,
"loss": 1.2578611373901367,
"step": 148
},
{
"epoch": 0.3171247357293869,
"grad_norm": 0.8831533789634705,
"learning_rate": 4.989981076479334e-06,
"loss": 1.2861902713775635,
"step": 150
},
{
"epoch": 0.321353065539112,
"grad_norm": 1.4398751258850098,
"learning_rate": 4.989225794439665e-06,
"loss": 1.2161321640014648,
"step": 152
},
{
"epoch": 0.32558139534883723,
"grad_norm": 2.2701990604400635,
"learning_rate": 4.9884431369650316e-06,
"loss": 0.7495906949043274,
"step": 154
},
{
"epoch": 0.3298097251585624,
"grad_norm": 1.7071163654327393,
"learning_rate": 4.987633113623737e-06,
"loss": 1.2624861001968384,
"step": 156
},
{
"epoch": 0.33403805496828753,
"grad_norm": 0.7767173647880554,
"learning_rate": 4.986795734318643e-06,
"loss": 1.2871098518371582,
"step": 158
},
{
"epoch": 0.3382663847780127,
"grad_norm": 3.3821675777435303,
"learning_rate": 4.985931009287047e-06,
"loss": 1.279846429824829,
"step": 160
},
{
"epoch": 0.34249471458773784,
"grad_norm": 1.1429699659347534,
"learning_rate": 4.98503894910056e-06,
"loss": 1.1057888269424438,
"step": 162
},
{
"epoch": 0.346723044397463,
"grad_norm": 1.2170449495315552,
"learning_rate": 4.9841195646649764e-06,
"loss": 1.0053894519805908,
"step": 164
},
{
"epoch": 0.35095137420718814,
"grad_norm": 0.7444745302200317,
"learning_rate": 4.98317286722014e-06,
"loss": 0.8725204467773438,
"step": 166
},
{
"epoch": 0.35517970401691334,
"grad_norm": 1.1420894861221313,
"learning_rate": 4.982198868339808e-06,
"loss": 1.1899808645248413,
"step": 168
},
{
"epoch": 0.3594080338266385,
"grad_norm": 0.6692090034484863,
"learning_rate": 4.981197579931507e-06,
"loss": 1.2234405279159546,
"step": 170
},
{
"epoch": 0.36363636363636365,
"grad_norm": 0.6509742736816406,
"learning_rate": 4.980169014236391e-06,
"loss": 1.050593614578247,
"step": 172
},
{
"epoch": 0.3678646934460888,
"grad_norm": 0.659625232219696,
"learning_rate": 4.979113183829088e-06,
"loss": 1.3807719945907593,
"step": 174
},
{
"epoch": 0.37209302325581395,
"grad_norm": 0.8343127369880676,
"learning_rate": 4.97803010161755e-06,
"loss": 1.068203330039978,
"step": 176
},
{
"epoch": 0.3763213530655391,
"grad_norm": 1.0895072221755981,
"learning_rate": 4.976919780842892e-06,
"loss": 0.8895647525787354,
"step": 178
},
{
"epoch": 0.38054968287526425,
"grad_norm": 0.4808787703514099,
"learning_rate": 4.97578223507923e-06,
"loss": 1.3076379299163818,
"step": 180
},
{
"epoch": 0.38477801268498946,
"grad_norm": 1.1349347829818726,
"learning_rate": 4.97461747823352e-06,
"loss": 1.547876238822937,
"step": 182
},
{
"epoch": 0.3890063424947146,
"grad_norm": 1.6290419101715088,
"learning_rate": 4.973425524545382e-06,
"loss": 1.3527616262435913,
"step": 184
},
{
"epoch": 0.39323467230443976,
"grad_norm": 1.678579568862915,
"learning_rate": 4.972206388586927e-06,
"loss": 1.102654218673706,
"step": 186
},
{
"epoch": 0.3974630021141649,
"grad_norm": 1.367773175239563,
"learning_rate": 4.970960085262584e-06,
"loss": 0.9921371340751648,
"step": 188
},
{
"epoch": 0.40169133192389006,
"grad_norm": 1.1447407007217407,
"learning_rate": 4.969686629808911e-06,
"loss": 1.0145394802093506,
"step": 190
},
{
"epoch": 0.4059196617336152,
"grad_norm": 0.8305537104606628,
"learning_rate": 4.9683860377944125e-06,
"loss": 1.3157576322555542,
"step": 192
},
{
"epoch": 0.41014799154334036,
"grad_norm": 0.30826497077941895,
"learning_rate": 4.967058325119348e-06,
"loss": 1.023323655128479,
"step": 194
},
{
"epoch": 0.4143763213530655,
"grad_norm": 1.3932182788848877,
"learning_rate": 4.965703508015539e-06,
"loss": 1.2941372394561768,
"step": 196
},
{
"epoch": 0.4186046511627907,
"grad_norm": 0.7291075587272644,
"learning_rate": 4.964321603046169e-06,
"loss": 1.0717015266418457,
"step": 198
},
{
"epoch": 0.42283298097251587,
"grad_norm": 0.5842322707176208,
"learning_rate": 4.962912627105581e-06,
"loss": 1.1873562335968018,
"step": 200
},
{
"epoch": 0.427061310782241,
"grad_norm": 0.6558592319488525,
"learning_rate": 4.961476597419072e-06,
"loss": 0.5549638867378235,
"step": 202
},
{
"epoch": 0.4312896405919662,
"grad_norm": 0.5496861934661865,
"learning_rate": 4.960013531542681e-06,
"loss": 1.251203179359436,
"step": 204
},
{
"epoch": 0.4355179704016913,
"grad_norm": 1.2829310894012451,
"learning_rate": 4.958523447362978e-06,
"loss": 1.3057016134262085,
"step": 206
},
{
"epoch": 0.4397463002114165,
"grad_norm": 1.240744709968567,
"learning_rate": 4.95700636309684e-06,
"loss": 0.773059606552124,
"step": 208
},
{
"epoch": 0.4439746300211416,
"grad_norm": 0.6856869459152222,
"learning_rate": 4.955462297291231e-06,
"loss": 1.3060951232910156,
"step": 210
},
{
"epoch": 0.44820295983086683,
"grad_norm": 1.0836693048477173,
"learning_rate": 4.953891268822977e-06,
"loss": 1.2806111574172974,
"step": 212
},
{
"epoch": 0.452431289640592,
"grad_norm": 0.8455312252044678,
"learning_rate": 4.952293296898531e-06,
"loss": 1.4302403926849365,
"step": 214
},
{
"epoch": 0.45665961945031713,
"grad_norm": 0.5475296974182129,
"learning_rate": 4.9506684010537425e-06,
"loss": 0.6154606938362122,
"step": 216
},
{
"epoch": 0.4608879492600423,
"grad_norm": 2.704235315322876,
"learning_rate": 4.949016601153615e-06,
"loss": 1.0028847455978394,
"step": 218
},
{
"epoch": 0.46511627906976744,
"grad_norm": 0.5048341751098633,
"learning_rate": 4.947337917392068e-06,
"loss": 1.2792624235153198,
"step": 220
},
{
"epoch": 0.4693446088794926,
"grad_norm": 1.052980899810791,
"learning_rate": 4.9456323702916834e-06,
"loss": 1.2493329048156738,
"step": 222
},
{
"epoch": 0.47357293868921774,
"grad_norm": 0.7700513601303101,
"learning_rate": 4.94389998070346e-06,
"loss": 1.3523839712142944,
"step": 224
},
{
"epoch": 0.47780126849894294,
"grad_norm": 0.4727860689163208,
"learning_rate": 4.9421407698065546e-06,
"loss": 1.262749433517456,
"step": 226
},
{
"epoch": 0.4820295983086681,
"grad_norm": 0.7402790188789368,
"learning_rate": 4.940354759108031e-06,
"loss": 1.2572187185287476,
"step": 228
},
{
"epoch": 0.48625792811839325,
"grad_norm": 0.5866014361381531,
"learning_rate": 4.938541970442585e-06,
"loss": 0.9033302068710327,
"step": 230
},
{
"epoch": 0.4904862579281184,
"grad_norm": 0.6419193148612976,
"learning_rate": 4.9367024259722866e-06,
"loss": 1.2711232900619507,
"step": 232
},
{
"epoch": 0.49471458773784355,
"grad_norm": 1.1681197881698608,
"learning_rate": 4.934836148186306e-06,
"loss": 0.9501933455467224,
"step": 234
},
{
"epoch": 0.4989429175475687,
"grad_norm": 7.673317909240723,
"learning_rate": 4.93294315990064e-06,
"loss": 0.8862608075141907,
"step": 236
},
{
"epoch": 0.5031712473572939,
"grad_norm": 1.019538402557373,
"learning_rate": 4.93102348425783e-06,
"loss": 0.8333485722541809,
"step": 238
},
{
"epoch": 0.507399577167019,
"grad_norm": 0.6142158508300781,
"learning_rate": 4.9290771447266815e-06,
"loss": 0.8960846066474915,
"step": 240
},
{
"epoch": 0.5116279069767442,
"grad_norm": 0.829379677772522,
"learning_rate": 4.927104165101979e-06,
"loss": 1.3168963193893433,
"step": 242
},
{
"epoch": 0.5158562367864693,
"grad_norm": 1.2920678853988647,
"learning_rate": 4.925104569504188e-06,
"loss": 1.365329623222351,
"step": 244
},
{
"epoch": 0.5200845665961945,
"grad_norm": 1.2702393531799316,
"learning_rate": 4.923078382379172e-06,
"loss": 1.2854634523391724,
"step": 246
},
{
"epoch": 0.5243128964059197,
"grad_norm": 0.618291974067688,
"learning_rate": 4.921025628497879e-06,
"loss": 1.2556568384170532,
"step": 248
},
{
"epoch": 0.5285412262156448,
"grad_norm": 1.7209643125534058,
"learning_rate": 4.918946332956052e-06,
"loss": 1.1718345880508423,
"step": 250
},
{
"epoch": 0.53276955602537,
"grad_norm": 2.5865588188171387,
"learning_rate": 4.916840521173914e-06,
"loss": 1.1015582084655762,
"step": 252
},
{
"epoch": 0.5369978858350951,
"grad_norm": 1.6121222972869873,
"learning_rate": 4.914708218895861e-06,
"loss": 1.742082118988037,
"step": 254
},
{
"epoch": 0.5412262156448203,
"grad_norm": 0.8724984526634216,
"learning_rate": 4.912549452190142e-06,
"loss": 1.2257004976272583,
"step": 256
},
{
"epoch": 0.5454545454545454,
"grad_norm": 0.8018029928207397,
"learning_rate": 4.9103642474485506e-06,
"loss": 1.404122233390808,
"step": 258
},
{
"epoch": 0.5496828752642706,
"grad_norm": 0.7809526920318604,
"learning_rate": 4.908152631386091e-06,
"loss": 1.0011447668075562,
"step": 260
},
{
"epoch": 0.5539112050739958,
"grad_norm": 0.903286874294281,
"learning_rate": 4.905914631040658e-06,
"loss": 1.2129504680633545,
"step": 262
},
{
"epoch": 0.5581395348837209,
"grad_norm": 0.8726590871810913,
"learning_rate": 4.9036502737727055e-06,
"loss": 1.0686239004135132,
"step": 264
},
{
"epoch": 0.5623678646934461,
"grad_norm": 1.0852851867675781,
"learning_rate": 4.901359587264911e-06,
"loss": 1.510224461555481,
"step": 266
},
{
"epoch": 0.5665961945031712,
"grad_norm": 1.2106469869613647,
"learning_rate": 4.899042599521836e-06,
"loss": 0.5239309072494507,
"step": 268
},
{
"epoch": 0.5708245243128964,
"grad_norm": 1.205407738685608,
"learning_rate": 4.8966993388695886e-06,
"loss": 1.0271662473678589,
"step": 270
},
{
"epoch": 0.5750528541226215,
"grad_norm": 2.216895580291748,
"learning_rate": 4.894329833955471e-06,
"loss": 1.2076795101165771,
"step": 272
},
{
"epoch": 0.5792811839323467,
"grad_norm": 1.1767100095748901,
"learning_rate": 4.891934113747631e-06,
"loss": 0.9579524993896484,
"step": 274
},
{
"epoch": 0.5835095137420718,
"grad_norm": 1.0378605127334595,
"learning_rate": 4.8895122075347135e-06,
"loss": 0.9333509206771851,
"step": 276
},
{
"epoch": 0.587737843551797,
"grad_norm": 0.2679741382598877,
"learning_rate": 4.887064144925493e-06,
"loss": 0.8527027368545532,
"step": 278
},
{
"epoch": 0.5919661733615222,
"grad_norm": 0.6713143587112427,
"learning_rate": 4.8845899558485185e-06,
"loss": 1.2377649545669556,
"step": 280
},
{
"epoch": 0.5961945031712473,
"grad_norm": 0.8605502247810364,
"learning_rate": 4.8820896705517465e-06,
"loss": 1.4566680192947388,
"step": 282
},
{
"epoch": 0.6004228329809725,
"grad_norm": 0.1871010959148407,
"learning_rate": 4.879563319602169e-06,
"loss": 0.9204542636871338,
"step": 284
},
{
"epoch": 0.6046511627906976,
"grad_norm": 1.0409096479415894,
"learning_rate": 4.87701093388544e-06,
"loss": 1.2875986099243164,
"step": 286
},
{
"epoch": 0.6088794926004228,
"grad_norm": 1.0819401741027832,
"learning_rate": 4.874432544605502e-06,
"loss": 0.4104747176170349,
"step": 288
},
{
"epoch": 0.6131078224101479,
"grad_norm": 1.2147349119186401,
"learning_rate": 4.871828183284199e-06,
"loss": 0.9401180744171143,
"step": 290
},
{
"epoch": 0.6173361522198731,
"grad_norm": 0.9073833227157593,
"learning_rate": 4.869197881760896e-06,
"loss": 0.881571888923645,
"step": 292
},
{
"epoch": 0.6215644820295984,
"grad_norm": 1.9964375495910645,
"learning_rate": 4.866541672192082e-06,
"loss": 0.7248478531837463,
"step": 294
},
{
"epoch": 0.6257928118393234,
"grad_norm": 0.8532471656799316,
"learning_rate": 4.863859587050991e-06,
"loss": 0.7459216117858887,
"step": 296
},
{
"epoch": 0.6300211416490487,
"grad_norm": 1.436072587966919,
"learning_rate": 4.861151659127188e-06,
"loss": 1.300452709197998,
"step": 298
},
{
"epoch": 0.6342494714587738,
"grad_norm": 0.7239471673965454,
"learning_rate": 4.85841792152618e-06,
"loss": 1.2527419328689575,
"step": 300
},
{
"epoch": 0.638477801268499,
"grad_norm": 0.6468306183815002,
"learning_rate": 4.85565840766901e-06,
"loss": 0.6989489197731018,
"step": 302
},
{
"epoch": 0.642706131078224,
"grad_norm": 0.8453686237335205,
"learning_rate": 4.852873151291841e-06,
"loss": 0.8262038230895996,
"step": 304
},
{
"epoch": 0.6469344608879493,
"grad_norm": 1.1085318326950073,
"learning_rate": 4.850062186445552e-06,
"loss": 0.9046404361724854,
"step": 306
},
{
"epoch": 0.6511627906976745,
"grad_norm": 1.146599531173706,
"learning_rate": 4.847225547495318e-06,
"loss": 1.2455283403396606,
"step": 308
},
{
"epoch": 0.6553911205073996,
"grad_norm": 1.3924742937088013,
"learning_rate": 4.84436326912019e-06,
"loss": 1.2206207513809204,
"step": 310
},
{
"epoch": 0.6596194503171248,
"grad_norm": 0.65780109167099,
"learning_rate": 4.84147538631267e-06,
"loss": 1.2247376441955566,
"step": 312
},
{
"epoch": 0.6638477801268499,
"grad_norm": 1.4019877910614014,
"learning_rate": 4.8385619343782865e-06,
"loss": 1.2421458959579468,
"step": 314
},
{
"epoch": 0.6680761099365751,
"grad_norm": 0.5540094375610352,
"learning_rate": 4.835622948935159e-06,
"loss": 1.0704643726348877,
"step": 316
},
{
"epoch": 0.6723044397463002,
"grad_norm": 4.01638126373291,
"learning_rate": 4.832658465913566e-06,
"loss": 0.7506370544433594,
"step": 318
},
{
"epoch": 0.6765327695560254,
"grad_norm": 0.8524858355522156,
"learning_rate": 4.829668521555503e-06,
"loss": 1.2541189193725586,
"step": 320
},
{
"epoch": 0.6807610993657506,
"grad_norm": 0.789856493473053,
"learning_rate": 4.826653152414242e-06,
"loss": 1.31632661819458,
"step": 322
},
{
"epoch": 0.6849894291754757,
"grad_norm": 0.6171086430549622,
"learning_rate": 4.823612395353881e-06,
"loss": 1.0809494256973267,
"step": 324
},
{
"epoch": 0.6892177589852009,
"grad_norm": 0.7493656873703003,
"learning_rate": 4.820546287548897e-06,
"loss": 1.2742823362350464,
"step": 326
},
{
"epoch": 0.693446088794926,
"grad_norm": 1.6876893043518066,
"learning_rate": 4.81745486648369e-06,
"loss": 1.1811712980270386,
"step": 328
},
{
"epoch": 0.6976744186046512,
"grad_norm": 3.2828762531280518,
"learning_rate": 4.814338169952125e-06,
"loss": 0.8377833366394043,
"step": 330
},
{
"epoch": 0.7019027484143763,
"grad_norm": 1.0052536725997925,
"learning_rate": 4.811196236057068e-06,
"loss": 1.3030086755752563,
"step": 332
},
{
"epoch": 0.7061310782241015,
"grad_norm": 1.0873143672943115,
"learning_rate": 4.808029103209925e-06,
"loss": 1.2012561559677124,
"step": 334
},
{
"epoch": 0.7103594080338267,
"grad_norm": 0.6253702640533447,
"learning_rate": 4.804836810130165e-06,
"loss": 1.2230525016784668,
"step": 336
},
{
"epoch": 0.7145877378435518,
"grad_norm": 0.9542647004127502,
"learning_rate": 4.801619395844855e-06,
"loss": 1.3592028617858887,
"step": 338
},
{
"epoch": 0.718816067653277,
"grad_norm": 2.7538552284240723,
"learning_rate": 4.798376899688178e-06,
"loss": 1.2697663307189941,
"step": 340
},
{
"epoch": 0.7230443974630021,
"grad_norm": 1.300477147102356,
"learning_rate": 4.79510936130095e-06,
"loss": 1.1157526969909668,
"step": 342
},
{
"epoch": 0.7272727272727273,
"grad_norm": 1.0564080476760864,
"learning_rate": 4.791816820630143e-06,
"loss": 0.9000387191772461,
"step": 344
},
{
"epoch": 0.7315010570824524,
"grad_norm": 6.434342861175537,
"learning_rate": 4.788499317928387e-06,
"loss": 0.8705897927284241,
"step": 346
},
{
"epoch": 0.7357293868921776,
"grad_norm": 1.8239296674728394,
"learning_rate": 4.785156893753487e-06,
"loss": 0.9839805364608765,
"step": 348
},
{
"epoch": 0.7399577167019028,
"grad_norm": 1.4335103034973145,
"learning_rate": 4.781789588967922e-06,
"loss": 1.3093687295913696,
"step": 350
},
{
"epoch": 0.7441860465116279,
"grad_norm": 1.6742173433303833,
"learning_rate": 4.778397444738344e-06,
"loss": 1.1608158349990845,
"step": 352
},
{
"epoch": 0.7484143763213531,
"grad_norm": 7.366899013519287,
"learning_rate": 4.774980502535081e-06,
"loss": 0.7054665088653564,
"step": 354
},
{
"epoch": 0.7526427061310782,
"grad_norm": 1.3835808038711548,
"learning_rate": 4.771538804131623e-06,
"loss": 1.0112260580062866,
"step": 356
},
{
"epoch": 0.7568710359408034,
"grad_norm": 0.7247501015663147,
"learning_rate": 4.7680723916041145e-06,
"loss": 1.21829092502594,
"step": 358
},
{
"epoch": 0.7610993657505285,
"grad_norm": 1.7645400762557983,
"learning_rate": 4.764581307330844e-06,
"loss": 0.8012920618057251,
"step": 360
},
{
"epoch": 0.7653276955602537,
"grad_norm": 0.9456394910812378,
"learning_rate": 4.761065593991716e-06,
"loss": 1.0871394872665405,
"step": 362
},
{
"epoch": 0.7695560253699789,
"grad_norm": 1.7007086277008057,
"learning_rate": 4.757525294567743e-06,
"loss": 1.0711324214935303,
"step": 364
},
{
"epoch": 0.773784355179704,
"grad_norm": 1.7648683786392212,
"learning_rate": 4.753960452340503e-06,
"loss": 1.2688275575637817,
"step": 366
},
{
"epoch": 0.7780126849894292,
"grad_norm": 2.573831796646118,
"learning_rate": 4.750371110891628e-06,
"loss": 1.2218682765960693,
"step": 368
},
{
"epoch": 0.7822410147991543,
"grad_norm": 1.4216328859329224,
"learning_rate": 4.746757314102258e-06,
"loss": 0.882118821144104,
"step": 370
},
{
"epoch": 0.7864693446088795,
"grad_norm": 0.9801198840141296,
"learning_rate": 4.74311910615251e-06,
"loss": 1.1977894306182861,
"step": 372
},
{
"epoch": 0.7906976744186046,
"grad_norm": 0.9408032894134521,
"learning_rate": 4.739456531520939e-06,
"loss": 1.218635082244873,
"step": 374
},
{
"epoch": 0.7949260042283298,
"grad_norm": 0.8495731949806213,
"learning_rate": 4.735769634983991e-06,
"loss": 1.3023980855941772,
"step": 376
},
{
"epoch": 0.7991543340380549,
"grad_norm": 0.7672894597053528,
"learning_rate": 4.732058461615457e-06,
"loss": 0.9807602763175964,
"step": 378
},
{
"epoch": 0.8033826638477801,
"grad_norm": 1.1932101249694824,
"learning_rate": 4.728323056785922e-06,
"loss": 1.3166661262512207,
"step": 380
},
{
"epoch": 0.8076109936575053,
"grad_norm": 0.8716092109680176,
"learning_rate": 4.724563466162212e-06,
"loss": 1.1811023950576782,
"step": 382
},
{
"epoch": 0.8118393234672304,
"grad_norm": 0.7233268618583679,
"learning_rate": 4.7207797357068325e-06,
"loss": 0.9482329487800598,
"step": 384
},
{
"epoch": 0.8160676532769556,
"grad_norm": 1.9323195219039917,
"learning_rate": 4.716971911677408e-06,
"loss": 0.9550711512565613,
"step": 386
},
{
"epoch": 0.8202959830866807,
"grad_norm": 2.0255048274993896,
"learning_rate": 4.713140040626116e-06,
"loss": 1.4793070554733276,
"step": 388
},
{
"epoch": 0.8245243128964059,
"grad_norm": 3.492385149002075,
"learning_rate": 4.709284169399122e-06,
"loss": 1.1643321514129639,
"step": 390
},
{
"epoch": 0.828752642706131,
"grad_norm": 1.8576074838638306,
"learning_rate": 4.7054043451359995e-06,
"loss": 0.9359977841377258,
"step": 392
},
{
"epoch": 0.8329809725158562,
"grad_norm": 2.6958370208740234,
"learning_rate": 4.70150061526916e-06,
"loss": 1.2630811929702759,
"step": 394
},
{
"epoch": 0.8372093023255814,
"grad_norm": 0.2705208957195282,
"learning_rate": 4.6975730275232675e-06,
"loss": 0.7544412612915039,
"step": 396
},
{
"epoch": 0.8414376321353065,
"grad_norm": 0.574475109577179,
"learning_rate": 4.693621629914662e-06,
"loss": 0.6635357737541199,
"step": 398
},
{
"epoch": 0.8456659619450317,
"grad_norm": 3.4476590156555176,
"learning_rate": 4.689646470750765e-06,
"loss": 1.1272733211517334,
"step": 400
},
{
"epoch": 0.8498942917547568,
"grad_norm": 1.7214937210083008,
"learning_rate": 4.685647598629496e-06,
"loss": 0.9259978532791138,
"step": 402
},
{
"epoch": 0.854122621564482,
"grad_norm": 0.8425617218017578,
"learning_rate": 4.681625062438672e-06,
"loss": 0.8047032952308655,
"step": 404
},
{
"epoch": 0.8583509513742071,
"grad_norm": 0.5278515219688416,
"learning_rate": 4.677578911355415e-06,
"loss": 0.9893290996551514,
"step": 406
},
{
"epoch": 0.8625792811839323,
"grad_norm": 1.7615666389465332,
"learning_rate": 4.673509194845547e-06,
"loss": 1.0258289575576782,
"step": 408
},
{
"epoch": 0.8668076109936576,
"grad_norm": 4.036815643310547,
"learning_rate": 4.669415962662987e-06,
"loss": 0.8945714235305786,
"step": 410
},
{
"epoch": 0.8710359408033826,
"grad_norm": 1.1441407203674316,
"learning_rate": 4.665299264849144e-06,
"loss": 1.1385798454284668,
"step": 412
},
{
"epoch": 0.8752642706131079,
"grad_norm": 0.9861418604850769,
"learning_rate": 4.661159151732302e-06,
"loss": 1.000221848487854,
"step": 414
},
{
"epoch": 0.879492600422833,
"grad_norm": 1.6568901538848877,
"learning_rate": 4.656995673927008e-06,
"loss": 1.1056493520736694,
"step": 416
},
{
"epoch": 0.8837209302325582,
"grad_norm": 1.9883769750595093,
"learning_rate": 4.6528088823334485e-06,
"loss": 1.2290613651275635,
"step": 418
},
{
"epoch": 0.8879492600422833,
"grad_norm": 2.026019334793091,
"learning_rate": 4.648598828136836e-06,
"loss": 1.2732092142105103,
"step": 420
},
{
"epoch": 0.8921775898520085,
"grad_norm": 2.332921266555786,
"learning_rate": 4.644365562806772e-06,
"loss": 0.9564085006713867,
"step": 422
},
{
"epoch": 0.8964059196617337,
"grad_norm": 1.1014127731323242,
"learning_rate": 4.6401091380966276e-06,
"loss": 1.2294795513153076,
"step": 424
},
{
"epoch": 0.9006342494714588,
"grad_norm": 0.6077444553375244,
"learning_rate": 4.635829606042904e-06,
"loss": 1.0533849000930786,
"step": 426
},
{
"epoch": 0.904862579281184,
"grad_norm": 1.495557427406311,
"learning_rate": 4.6315270189645994e-06,
"loss": 0.870442807674408,
"step": 428
},
{
"epoch": 0.9090909090909091,
"grad_norm": 0.9370867609977722,
"learning_rate": 4.627201429462571e-06,
"loss": 1.0831764936447144,
"step": 430
},
{
"epoch": 0.9133192389006343,
"grad_norm": 1.3776339292526245,
"learning_rate": 4.622852890418887e-06,
"loss": 1.2492940425872803,
"step": 432
},
{
"epoch": 0.9175475687103594,
"grad_norm": 1.6412044763565063,
"learning_rate": 4.618481454996184e-06,
"loss": 0.5277518033981323,
"step": 434
},
{
"epoch": 0.9217758985200846,
"grad_norm": 1.8212065696716309,
"learning_rate": 4.614087176637018e-06,
"loss": 0.39484813809394836,
"step": 436
},
{
"epoch": 0.9260042283298098,
"grad_norm": 0.5595113039016724,
"learning_rate": 4.6096701090632064e-06,
"loss": 0.9221642017364502,
"step": 438
},
{
"epoch": 0.9302325581395349,
"grad_norm": 0.5767474174499512,
"learning_rate": 4.605230306275174e-06,
"loss": 1.1318392753601074,
"step": 440
},
{
"epoch": 0.9344608879492601,
"grad_norm": 7.54542875289917,
"learning_rate": 4.600767822551295e-06,
"loss": 0.7188118100166321,
"step": 442
},
{
"epoch": 0.9386892177589852,
"grad_norm": 1.233697772026062,
"learning_rate": 4.596282712447225e-06,
"loss": 1.243707299232483,
"step": 444
},
{
"epoch": 0.9429175475687104,
"grad_norm": 0.9562466144561768,
"learning_rate": 4.591775030795238e-06,
"loss": 1.0868984460830688,
"step": 446
},
{
"epoch": 0.9471458773784355,
"grad_norm": 1.5018267631530762,
"learning_rate": 4.587244832703551e-06,
"loss": 1.1005150079727173,
"step": 448
},
{
"epoch": 0.9513742071881607,
"grad_norm": 1.9610888957977295,
"learning_rate": 4.582692173555658e-06,
"loss": 0.7627214193344116,
"step": 450
},
{
"epoch": 0.9556025369978859,
"grad_norm": 0.661365807056427,
"learning_rate": 4.5781171090096456e-06,
"loss": 1.0607075691223145,
"step": 452
},
{
"epoch": 0.959830866807611,
"grad_norm": 1.50721275806427,
"learning_rate": 4.573519694997514e-06,
"loss": 1.3157492876052856,
"step": 454
},
{
"epoch": 0.9640591966173362,
"grad_norm": 5.258788585662842,
"learning_rate": 4.568899987724499e-06,
"loss": 0.6505974531173706,
"step": 456
},
{
"epoch": 0.9682875264270613,
"grad_norm": 2.9857466220855713,
"learning_rate": 4.564258043668378e-06,
"loss": 0.8859183192253113,
"step": 458
},
{
"epoch": 0.9725158562367865,
"grad_norm": 1.149194598197937,
"learning_rate": 4.559593919578779e-06,
"loss": 1.232746958732605,
"step": 460
},
{
"epoch": 0.9767441860465116,
"grad_norm": 1.1455705165863037,
"learning_rate": 4.554907672476498e-06,
"loss": 1.2240073680877686,
"step": 462
},
{
"epoch": 0.9809725158562368,
"grad_norm": 2.103093385696411,
"learning_rate": 4.550199359652783e-06,
"loss": 0.6853596568107605,
"step": 464
},
{
"epoch": 0.985200845665962,
"grad_norm": 2.1419098377227783,
"learning_rate": 4.5454690386686525e-06,
"loss": 1.2064260244369507,
"step": 466
},
{
"epoch": 0.9894291754756871,
"grad_norm": 2.1042675971984863,
"learning_rate": 4.540716767354182e-06,
"loss": 0.9678149819374084,
"step": 468
},
{
"epoch": 0.9936575052854123,
"grad_norm": 1.194627046585083,
"learning_rate": 4.5359426038077955e-06,
"loss": 1.2596162557601929,
"step": 470
},
{
"epoch": 0.9978858350951374,
"grad_norm": 0.7306416034698486,
"learning_rate": 4.531146606395561e-06,
"loss": 1.2588738203048706,
"step": 472
},
{
"epoch": 1.0021141649048626,
"grad_norm": 1.3689743280410767,
"learning_rate": 4.5263288337504755e-06,
"loss": 0.9573943614959717,
"step": 474
},
{
"epoch": 1.0063424947145878,
"grad_norm": 0.6775557994842529,
"learning_rate": 4.521489344771744e-06,
"loss": 1.2035043239593506,
"step": 476
},
{
"epoch": 1.0105708245243128,
"grad_norm": 2.428388833999634,
"learning_rate": 4.516628198624062e-06,
"loss": 0.43922215700149536,
"step": 478
},
{
"epoch": 1.014799154334038,
"grad_norm": 1.7859880924224854,
"learning_rate": 4.511745454736895e-06,
"loss": 0.8049924969673157,
"step": 480
},
{
"epoch": 1.0190274841437632,
"grad_norm": 0.9620187878608704,
"learning_rate": 4.506841172803751e-06,
"loss": 0.7076442241668701,
"step": 482
},
{
"epoch": 1.0232558139534884,
"grad_norm": 0.7065964341163635,
"learning_rate": 4.501915412781443e-06,
"loss": 1.0704156160354614,
"step": 484
},
{
"epoch": 1.0274841437632136,
"grad_norm": 1.055808663368225,
"learning_rate": 4.49696823488937e-06,
"loss": 1.1018344163894653,
"step": 486
},
{
"epoch": 1.0317124735729386,
"grad_norm": 1.521101474761963,
"learning_rate": 4.491999699608768e-06,
"loss": 0.8694652915000916,
"step": 488
},
{
"epoch": 1.0359408033826638,
"grad_norm": 1.305381178855896,
"learning_rate": 4.487009867681976e-06,
"loss": 0.8501845002174377,
"step": 490
},
{
"epoch": 1.040169133192389,
"grad_norm": 2.744489908218384,
"learning_rate": 4.4819988001116935e-06,
"loss": 0.7224630117416382,
"step": 492
},
{
"epoch": 1.0443974630021142,
"grad_norm": 1.4060019254684448,
"learning_rate": 4.476966558160237e-06,
"loss": 1.1804600954055786,
"step": 494
},
{
"epoch": 1.0486257928118394,
"grad_norm": 0.8754310011863708,
"learning_rate": 4.4719132033487845e-06,
"loss": 0.997734010219574,
"step": 496
},
{
"epoch": 1.0528541226215644,
"grad_norm": 0.615909993648529,
"learning_rate": 4.46683879745663e-06,
"loss": 0.8599073886871338,
"step": 498
},
{
"epoch": 1.0570824524312896,
"grad_norm": 0.9889487028121948,
"learning_rate": 4.461743402520423e-06,
"loss": 0.8792165517807007,
"step": 500
},
{
"epoch": 1.0613107822410148,
"grad_norm": 0.7160633206367493,
"learning_rate": 4.456627080833414e-06,
"loss": 1.1756080389022827,
"step": 502
},
{
"epoch": 1.06553911205074,
"grad_norm": 0.8279690742492676,
"learning_rate": 4.451489894944691e-06,
"loss": 1.1408627033233643,
"step": 504
},
{
"epoch": 1.069767441860465,
"grad_norm": 1.4028804302215576,
"learning_rate": 4.446331907658416e-06,
"loss": 0.8269267678260803,
"step": 506
},
{
"epoch": 1.0739957716701902,
"grad_norm": 0.851521909236908,
"learning_rate": 4.441153182033057e-06,
"loss": 0.8282674551010132,
"step": 508
},
{
"epoch": 1.0782241014799154,
"grad_norm": 4.568781852722168,
"learning_rate": 4.435953781380613e-06,
"loss": 0.9908189177513123,
"step": 510
},
{
"epoch": 1.0824524312896406,
"grad_norm": 1.2083165645599365,
"learning_rate": 4.430733769265846e-06,
"loss": 1.0665321350097656,
"step": 512
},
{
"epoch": 1.0866807610993658,
"grad_norm": 0.9603208303451538,
"learning_rate": 4.425493209505503e-06,
"loss": 1.1846468448638916,
"step": 514
},
{
"epoch": 1.0909090909090908,
"grad_norm": 2.0771138668060303,
"learning_rate": 4.420232166167531e-06,
"loss": 0.920912504196167,
"step": 516
},
{
"epoch": 1.095137420718816,
"grad_norm": 0.8328940868377686,
"learning_rate": 4.414950703570299e-06,
"loss": 0.9249328374862671,
"step": 518
},
{
"epoch": 1.0993657505285412,
"grad_norm": 1.3584532737731934,
"learning_rate": 4.40964888628181e-06,
"loss": 0.6281458139419556,
"step": 520
},
{
"epoch": 1.1035940803382664,
"grad_norm": 0.7690941095352173,
"learning_rate": 4.404326779118909e-06,
"loss": 1.4201838970184326,
"step": 522
},
{
"epoch": 1.1078224101479917,
"grad_norm": 1.4306553602218628,
"learning_rate": 4.398984447146496e-06,
"loss": 0.7664209604263306,
"step": 524
},
{
"epoch": 1.1120507399577166,
"grad_norm": 0.9326035976409912,
"learning_rate": 4.393621955676723e-06,
"loss": 1.3979065418243408,
"step": 526
},
{
"epoch": 1.1162790697674418,
"grad_norm": 0.5948351621627808,
"learning_rate": 4.3882393702682046e-06,
"loss": 0.8897819519042969,
"step": 528
},
{
"epoch": 1.120507399577167,
"grad_norm": 18.87567138671875,
"learning_rate": 4.38283675672521e-06,
"loss": 0.4860161244869232,
"step": 530
},
{
"epoch": 1.1247357293868923,
"grad_norm": 0.7055052518844604,
"learning_rate": 4.377414181096859e-06,
"loss": 1.1043274402618408,
"step": 532
},
{
"epoch": 1.1289640591966172,
"grad_norm": 0.9923433065414429,
"learning_rate": 4.371971709676319e-06,
"loss": 0.8963814973831177,
"step": 534
},
{
"epoch": 1.1331923890063424,
"grad_norm": 0.6690739393234253,
"learning_rate": 4.366509408999988e-06,
"loss": 0.8666636347770691,
"step": 536
},
{
"epoch": 1.1374207188160677,
"grad_norm": 1.7911560535430908,
"learning_rate": 4.361027345846687e-06,
"loss": 0.7381163239479065,
"step": 538
},
{
"epoch": 1.1416490486257929,
"grad_norm": 2.775190591812134,
"learning_rate": 4.355525587236841e-06,
"loss": 0.803221583366394,
"step": 540
},
{
"epoch": 1.145877378435518,
"grad_norm": 0.5926763415336609,
"learning_rate": 4.350004200431658e-06,
"loss": 1.1303699016571045,
"step": 542
},
{
"epoch": 1.150105708245243,
"grad_norm": 1.6176501512527466,
"learning_rate": 4.344463252932312e-06,
"loss": 0.7936561107635498,
"step": 544
},
{
"epoch": 1.1543340380549683,
"grad_norm": 0.7994486689567566,
"learning_rate": 4.33890281247911e-06,
"loss": 1.1952998638153076,
"step": 546
},
{
"epoch": 1.1585623678646935,
"grad_norm": 5.03057861328125,
"learning_rate": 4.333322947050673e-06,
"loss": 1.3116034269332886,
"step": 548
},
{
"epoch": 1.1627906976744187,
"grad_norm": 1.1229743957519531,
"learning_rate": 4.3277237248630946e-06,
"loss": 0.8429150581359863,
"step": 550
},
{
"epoch": 1.1670190274841437,
"grad_norm": 1.2708096504211426,
"learning_rate": 4.3221052143691185e-06,
"loss": 1.1472980976104736,
"step": 552
},
{
"epoch": 1.1712473572938689,
"grad_norm": 1.303392767906189,
"learning_rate": 4.316467484257291e-06,
"loss": 1.1732940673828125,
"step": 554
},
{
"epoch": 1.175475687103594,
"grad_norm": 1.490607738494873,
"learning_rate": 4.310810603451128e-06,
"loss": 1.079361915588379,
"step": 556
},
{
"epoch": 1.1797040169133193,
"grad_norm": 0.6570778489112854,
"learning_rate": 4.30513464110827e-06,
"loss": 1.1126495599746704,
"step": 558
},
{
"epoch": 1.1839323467230445,
"grad_norm": 0.64628005027771,
"learning_rate": 4.299439666619637e-06,
"loss": 1.085148811340332,
"step": 560
},
{
"epoch": 1.1881606765327695,
"grad_norm": 0.881676435470581,
"learning_rate": 4.293725749608581e-06,
"loss": 0.8194442987442017,
"step": 562
},
{
"epoch": 1.1923890063424947,
"grad_norm": 1.4025084972381592,
"learning_rate": 4.287992959930033e-06,
"loss": 1.130499005317688,
"step": 564
},
{
"epoch": 1.1966173361522199,
"grad_norm": 1.591407060623169,
"learning_rate": 4.282241367669648e-06,
"loss": 1.0246634483337402,
"step": 566
},
{
"epoch": 1.200845665961945,
"grad_norm": 3.8888139724731445,
"learning_rate": 4.276471043142954e-06,
"loss": 1.2712934017181396,
"step": 568
},
{
"epoch": 1.20507399577167,
"grad_norm": 2.388948678970337,
"learning_rate": 4.270682056894487e-06,
"loss": 1.2416294813156128,
"step": 570
},
{
"epoch": 1.2093023255813953,
"grad_norm": 2.251979351043701,
"learning_rate": 4.264874479696928e-06,
"loss": 1.0589932203292847,
"step": 572
},
{
"epoch": 1.2135306553911205,
"grad_norm": 0.7591463923454285,
"learning_rate": 4.2590483825502425e-06,
"loss": 0.8409648537635803,
"step": 574
},
{
"epoch": 1.2177589852008457,
"grad_norm": 1.53620183467865,
"learning_rate": 4.25320383668081e-06,
"loss": 0.8636283278465271,
"step": 576
},
{
"epoch": 1.221987315010571,
"grad_norm": 1.1595256328582764,
"learning_rate": 4.247340913540548e-06,
"loss": 0.9905154705047607,
"step": 578
},
{
"epoch": 1.226215644820296,
"grad_norm": 1.0831875801086426,
"learning_rate": 4.241459684806052e-06,
"loss": 0.8118501305580139,
"step": 580
},
{
"epoch": 1.230443974630021,
"grad_norm": 0.7525449395179749,
"learning_rate": 4.235560222377703e-06,
"loss": 1.1561369895935059,
"step": 582
},
{
"epoch": 1.2346723044397463,
"grad_norm": 3.329188585281372,
"learning_rate": 4.2296425983788e-06,
"loss": 1.0410387516021729,
"step": 584
},
{
"epoch": 1.2389006342494715,
"grad_norm": 1.3844683170318604,
"learning_rate": 4.223706885154674e-06,
"loss": 0.877763569355011,
"step": 586
},
{
"epoch": 1.2431289640591967,
"grad_norm": 1.171336054801941,
"learning_rate": 4.217753155271804e-06,
"loss": 0.9664973020553589,
"step": 588
},
{
"epoch": 1.2473572938689217,
"grad_norm": 0.8962653279304504,
"learning_rate": 4.21178148151693e-06,
"loss": 0.6594605445861816,
"step": 590
},
{
"epoch": 1.251585623678647,
"grad_norm": 0.4513523578643799,
"learning_rate": 4.2057919368961626e-06,
"loss": 0.9009559154510498,
"step": 592
},
{
"epoch": 1.255813953488372,
"grad_norm": 1.2183146476745605,
"learning_rate": 4.199784594634091e-06,
"loss": 1.0488721132278442,
"step": 594
},
{
"epoch": 1.2600422832980973,
"grad_norm": 0.703557014465332,
"learning_rate": 4.193759528172889e-06,
"loss": 0.760339617729187,
"step": 596
},
{
"epoch": 1.2642706131078225,
"grad_norm": 1.2198814153671265,
"learning_rate": 4.187716811171412e-06,
"loss": 1.1317111253738403,
"step": 598
},
{
"epoch": 1.2684989429175475,
"grad_norm": 1.99573814868927,
"learning_rate": 4.181656517504306e-06,
"loss": 1.3582342863082886,
"step": 600
},
{
"epoch": 1.2727272727272727,
"grad_norm": 0.549845814704895,
"learning_rate": 4.175578721261093e-06,
"loss": 0.9524427056312561,
"step": 602
},
{
"epoch": 1.276955602536998,
"grad_norm": 1.245996117591858,
"learning_rate": 4.169483496745277e-06,
"loss": 1.1659082174301147,
"step": 604
},
{
"epoch": 1.2811839323467231,
"grad_norm": 2.977156162261963,
"learning_rate": 4.163370918473426e-06,
"loss": 0.790830135345459,
"step": 606
},
{
"epoch": 1.285412262156448,
"grad_norm": 0.9513248801231384,
"learning_rate": 4.157241061174261e-06,
"loss": 1.151841640472412,
"step": 608
},
{
"epoch": 1.2896405919661733,
"grad_norm": 0.8377892374992371,
"learning_rate": 4.151093999787755e-06,
"loss": 0.7630675435066223,
"step": 610
},
{
"epoch": 1.2938689217758985,
"grad_norm": 0.74871826171875,
"learning_rate": 4.144929809464202e-06,
"loss": 0.6663084626197815,
"step": 612
},
{
"epoch": 1.2980972515856237,
"grad_norm": 0.7946862578392029,
"learning_rate": 4.138748565563304e-06,
"loss": 0.9356685876846313,
"step": 614
},
{
"epoch": 1.302325581395349,
"grad_norm": 1.464992880821228,
"learning_rate": 4.132550343653251e-06,
"loss": 0.49841123819351196,
"step": 616
},
{
"epoch": 1.306553911205074,
"grad_norm": 2.7764222621917725,
"learning_rate": 4.1263352195097975e-06,
"loss": 0.921845018863678,
"step": 618
},
{
"epoch": 1.3107822410147991,
"grad_norm": 2.9197912216186523,
"learning_rate": 4.120103269115332e-06,
"loss": 1.1502526998519897,
"step": 620
},
{
"epoch": 1.3150105708245243,
"grad_norm": 0.5009030103683472,
"learning_rate": 4.113854568657952e-06,
"loss": 1.1119526624679565,
"step": 622
},
{
"epoch": 1.3192389006342495,
"grad_norm": 4.491978168487549,
"learning_rate": 4.107589194530532e-06,
"loss": 0.7493167519569397,
"step": 624
},
{
"epoch": 1.3234672304439745,
"grad_norm": 0.510420560836792,
"learning_rate": 4.101307223329786e-06,
"loss": 1.1615945100784302,
"step": 626
},
{
"epoch": 1.3276955602536997,
"grad_norm": 1.2960412502288818,
"learning_rate": 4.0950087318553375e-06,
"loss": 1.3132972717285156,
"step": 628
},
{
"epoch": 1.331923890063425,
"grad_norm": 0.8799285292625427,
"learning_rate": 4.088693797108774e-06,
"loss": 1.0321528911590576,
"step": 630
},
{
"epoch": 1.3361522198731501,
"grad_norm": 1.2005505561828613,
"learning_rate": 4.0823624962927104e-06,
"loss": 0.616770327091217,
"step": 632
},
{
"epoch": 1.3403805496828753,
"grad_norm": 0.6413878798484802,
"learning_rate": 4.076014906809842e-06,
"loss": 0.747455358505249,
"step": 634
},
{
"epoch": 1.3446088794926006,
"grad_norm": 0.6914223432540894,
"learning_rate": 4.069651106262003e-06,
"loss": 0.8139711022377014,
"step": 636
},
{
"epoch": 1.3488372093023255,
"grad_norm": 3.342055082321167,
"learning_rate": 4.063271172449209e-06,
"loss": 1.0335206985473633,
"step": 638
},
{
"epoch": 1.3530655391120507,
"grad_norm": 2.376635789871216,
"learning_rate": 4.0568751833687155e-06,
"loss": 0.7637988328933716,
"step": 640
},
{
"epoch": 1.357293868921776,
"grad_norm": 0.9393727779388428,
"learning_rate": 4.050463217214058e-06,
"loss": 1.218309760093689,
"step": 642
},
{
"epoch": 1.361522198731501,
"grad_norm": 4.736209869384766,
"learning_rate": 4.0440353523741e-06,
"loss": 1.1682794094085693,
"step": 644
},
{
"epoch": 1.3657505285412261,
"grad_norm": 1.6625150442123413,
"learning_rate": 4.0375916674320694e-06,
"loss": 0.7112323045730591,
"step": 646
},
{
"epoch": 1.3699788583509513,
"grad_norm": 1.635366678237915,
"learning_rate": 4.0311322411646045e-06,
"loss": 0.7137230634689331,
"step": 648
},
{
"epoch": 1.3742071881606766,
"grad_norm": 1.2800323963165283,
"learning_rate": 4.0246571525407875e-06,
"loss": 0.7801585793495178,
"step": 650
},
{
"epoch": 1.3784355179704018,
"grad_norm": 1.4994843006134033,
"learning_rate": 4.018166480721178e-06,
"loss": 0.7897611856460571,
"step": 652
},
{
"epoch": 1.382663847780127,
"grad_norm": 0.7120780348777771,
"learning_rate": 4.011660305056846e-06,
"loss": 1.1767425537109375,
"step": 654
},
{
"epoch": 1.386892177589852,
"grad_norm": 0.7388160228729248,
"learning_rate": 4.005138705088401e-06,
"loss": 1.0873156785964966,
"step": 656
},
{
"epoch": 1.3911205073995772,
"grad_norm": 1.0489729642868042,
"learning_rate": 3.9986017605450265e-06,
"loss": 0.8503063321113586,
"step": 658
},
{
"epoch": 1.3953488372093024,
"grad_norm": 0.8119449019432068,
"learning_rate": 3.992049551343493e-06,
"loss": 0.9161325097084045,
"step": 660
},
{
"epoch": 1.3995771670190273,
"grad_norm": 0.5929046869277954,
"learning_rate": 3.985482157587192e-06,
"loss": 1.1369270086288452,
"step": 662
},
{
"epoch": 1.4038054968287526,
"grad_norm": 0.3672987222671509,
"learning_rate": 3.97889965956515e-06,
"loss": 1.0164310932159424,
"step": 664
},
{
"epoch": 1.4080338266384778,
"grad_norm": 1.0386170148849487,
"learning_rate": 3.972302137751051e-06,
"loss": 1.374223232269287,
"step": 666
},
{
"epoch": 1.412262156448203,
"grad_norm": 1.0722689628601074,
"learning_rate": 3.9656896728022476e-06,
"loss": 1.12968111038208,
"step": 668
},
{
"epoch": 1.4164904862579282,
"grad_norm": 1.196387529373169,
"learning_rate": 3.959062345558782e-06,
"loss": 0.771783173084259,
"step": 670
},
{
"epoch": 1.4207188160676534,
"grad_norm": 1.3007256984710693,
"learning_rate": 3.9524202370423915e-06,
"loss": 1.1213726997375488,
"step": 672
},
{
"epoch": 1.4249471458773784,
"grad_norm": 0.9761534929275513,
"learning_rate": 3.945763428455523e-06,
"loss": 0.6354954242706299,
"step": 674
},
{
"epoch": 1.4291754756871036,
"grad_norm": 1.2106300592422485,
"learning_rate": 3.939092001180332e-06,
"loss": 0.8169525861740112,
"step": 676
},
{
"epoch": 1.4334038054968288,
"grad_norm": 0.6862068176269531,
"learning_rate": 3.932406036777701e-06,
"loss": 1.3615213632583618,
"step": 678
},
{
"epoch": 1.437632135306554,
"grad_norm": 1.1061359643936157,
"learning_rate": 3.9257056169862305e-06,
"loss": 1.1848570108413696,
"step": 680
},
{
"epoch": 1.441860465116279,
"grad_norm": 1.6158320903778076,
"learning_rate": 3.918990823721243e-06,
"loss": 1.1745814085006714,
"step": 682
},
{
"epoch": 1.4460887949260042,
"grad_norm": 0.6842957735061646,
"learning_rate": 3.912261739073785e-06,
"loss": 1.106062650680542,
"step": 684
},
{
"epoch": 1.4503171247357294,
"grad_norm": 1.5938684940338135,
"learning_rate": 3.905518445309619e-06,
"loss": 1.4594074487686157,
"step": 686
},
{
"epoch": 1.4545454545454546,
"grad_norm": 0.7108921408653259,
"learning_rate": 3.8987610248682205e-06,
"loss": 1.0741581916809082,
"step": 688
},
{
"epoch": 1.4587737843551798,
"grad_norm": 7.655938148498535,
"learning_rate": 3.89198956036177e-06,
"loss": 0.41335436701774597,
"step": 690
},
{
"epoch": 1.463002114164905,
"grad_norm": 0.6948283910751343,
"learning_rate": 3.885204134574141e-06,
"loss": 1.146783709526062,
"step": 692
},
{
"epoch": 1.46723044397463,
"grad_norm": 0.6634160876274109,
"learning_rate": 3.878404830459889e-06,
"loss": 0.65525221824646,
"step": 694
},
{
"epoch": 1.4714587737843552,
"grad_norm": 0.9858572483062744,
"learning_rate": 3.87159173114324e-06,
"loss": 1.1054624319076538,
"step": 696
},
{
"epoch": 1.4756871035940804,
"grad_norm": 1.3330109119415283,
"learning_rate": 3.86476491991707e-06,
"loss": 1.1466596126556396,
"step": 698
},
{
"epoch": 1.4799154334038054,
"grad_norm": 3.4319090843200684,
"learning_rate": 3.857924480241888e-06,
"loss": 0.9684445261955261,
"step": 700
},
{
"epoch": 1.4841437632135306,
"grad_norm": 0.5792906880378723,
"learning_rate": 3.851070495744819e-06,
"loss": 1.1101263761520386,
"step": 702
},
{
"epoch": 1.4883720930232558,
"grad_norm": 0.584158718585968,
"learning_rate": 3.8442030502185745e-06,
"loss": 1.0356827974319458,
"step": 704
},
{
"epoch": 1.492600422832981,
"grad_norm": 0.7270916700363159,
"learning_rate": 3.837322227620439e-06,
"loss": 0.8322772979736328,
"step": 706
},
{
"epoch": 1.4968287526427062,
"grad_norm": 0.28889569640159607,
"learning_rate": 3.830428112071228e-06,
"loss": 0.2769829332828522,
"step": 708
},
{
"epoch": 1.5010570824524314,
"grad_norm": 0.7377986907958984,
"learning_rate": 3.823520787854278e-06,
"loss": 0.6088220477104187,
"step": 710
},
{
"epoch": 1.5052854122621564,
"grad_norm": 2.107346296310425,
"learning_rate": 3.816600339414402e-06,
"loss": 0.5735040903091431,
"step": 712
},
{
"epoch": 1.5095137420718816,
"grad_norm": 0.6663100719451904,
"learning_rate": 3.8096668513568608e-06,
"loss": 0.9799573421478271,
"step": 714
},
{
"epoch": 1.5137420718816068,
"grad_norm": 0.7188597917556763,
"learning_rate": 3.8027204084463334e-06,
"loss": 1.1207448244094849,
"step": 716
},
{
"epoch": 1.5179704016913318,
"grad_norm": 0.694125771522522,
"learning_rate": 3.795761095605873e-06,
"loss": 1.0090175867080688,
"step": 718
},
{
"epoch": 1.522198731501057,
"grad_norm": 0.3084549903869629,
"learning_rate": 3.7887889979158775e-06,
"loss": 0.9819098711013794,
"step": 720
},
{
"epoch": 1.5264270613107822,
"grad_norm": 1.8949941396713257,
"learning_rate": 3.7818042006130405e-06,
"loss": 0.8384270071983337,
"step": 722
},
{
"epoch": 1.5306553911205074,
"grad_norm": 1.5150532722473145,
"learning_rate": 3.774806789089316e-06,
"loss": 0.9709129929542542,
"step": 724
},
{
"epoch": 1.5348837209302326,
"grad_norm": 1.0952752828598022,
"learning_rate": 3.7677968488908705e-06,
"loss": 0.9372836947441101,
"step": 726
},
{
"epoch": 1.5391120507399578,
"grad_norm": 1.564868450164795,
"learning_rate": 3.76077446571704e-06,
"loss": 0.6753690242767334,
"step": 728
},
{
"epoch": 1.543340380549683,
"grad_norm": 1.170804500579834,
"learning_rate": 3.75373972541928e-06,
"loss": 0.8191190361976624,
"step": 730
},
{
"epoch": 1.547568710359408,
"grad_norm": 0.679467499256134,
"learning_rate": 3.746692714000117e-06,
"loss": 1.086642861366272,
"step": 732
},
{
"epoch": 1.5517970401691332,
"grad_norm": 0.2902541756629944,
"learning_rate": 3.7396335176120953e-06,
"loss": 0.25046733021736145,
"step": 734
},
{
"epoch": 1.5560253699788582,
"grad_norm": 2.038381576538086,
"learning_rate": 3.7325622225567294e-06,
"loss": 1.009968876838684,
"step": 736
},
{
"epoch": 1.5602536997885834,
"grad_norm": 0.2496039867401123,
"learning_rate": 3.725478915283439e-06,
"loss": 0.84336918592453,
"step": 738
},
{
"epoch": 1.5644820295983086,
"grad_norm": 0.559074878692627,
"learning_rate": 3.7183836823885045e-06,
"loss": 1.1601533889770508,
"step": 740
},
{
"epoch": 1.5687103594080338,
"grad_norm": 1.2242622375488281,
"learning_rate": 3.7112766106139964e-06,
"loss": 0.8150052428245544,
"step": 742
},
{
"epoch": 1.572938689217759,
"grad_norm": 1.0551347732543945,
"learning_rate": 3.7041577868467242e-06,
"loss": 1.1540948152542114,
"step": 744
},
{
"epoch": 1.5771670190274842,
"grad_norm": 2.7716071605682373,
"learning_rate": 3.697027298117168e-06,
"loss": 1.1788626909255981,
"step": 746
},
{
"epoch": 1.5813953488372094,
"grad_norm": 1.1499396562576294,
"learning_rate": 3.6898852315984156e-06,
"loss": 1.057762861251831,
"step": 748
},
{
"epoch": 1.5856236786469344,
"grad_norm": 0.3814210295677185,
"learning_rate": 3.6827316746051015e-06,
"loss": 0.04337337985634804,
"step": 750
},
{
"epoch": 1.5898520084566596,
"grad_norm": 1.3480174541473389,
"learning_rate": 3.675566714592333e-06,
"loss": 0.9101552367210388,
"step": 752
},
{
"epoch": 1.5940803382663846,
"grad_norm": 1.1889445781707764,
"learning_rate": 3.6683904391546255e-06,
"loss": 1.2129230499267578,
"step": 754
},
{
"epoch": 1.5983086680761098,
"grad_norm": 0.5748162269592285,
"learning_rate": 3.6612029360248285e-06,
"loss": 1.1286925077438354,
"step": 756
},
{
"epoch": 1.602536997885835,
"grad_norm": 0.724022626876831,
"learning_rate": 3.6540042930730556e-06,
"loss": 1.1947628259658813,
"step": 758
},
{
"epoch": 1.6067653276955602,
"grad_norm": 0.677099347114563,
"learning_rate": 3.6467945983056104e-06,
"loss": 1.1410974264144897,
"step": 760
},
{
"epoch": 1.6109936575052854,
"grad_norm": 0.6079980731010437,
"learning_rate": 3.6395739398639057e-06,
"loss": 1.1570736169815063,
"step": 762
},
{
"epoch": 1.6152219873150107,
"grad_norm": 0.9599738121032715,
"learning_rate": 3.6323424060233936e-06,
"loss": 1.035282015800476,
"step": 764
},
{
"epoch": 1.6194503171247359,
"grad_norm": 1.0322376489639282,
"learning_rate": 3.6251000851924806e-06,
"loss": 0.8392003774642944,
"step": 766
},
{
"epoch": 1.6236786469344608,
"grad_norm": 0.708662211894989,
"learning_rate": 3.617847065911447e-06,
"loss": 1.1536966562271118,
"step": 768
},
{
"epoch": 1.627906976744186,
"grad_norm": 1.8593244552612305,
"learning_rate": 3.610583436851369e-06,
"loss": 1.0729390382766724,
"step": 770
},
{
"epoch": 1.6321353065539113,
"grad_norm": 0.5333645343780518,
"learning_rate": 3.603309286813029e-06,
"loss": 1.1488738059997559,
"step": 772
},
{
"epoch": 1.6363636363636362,
"grad_norm": 1.6851012706756592,
"learning_rate": 3.596024704725835e-06,
"loss": 0.9281710386276245,
"step": 774
},
{
"epoch": 1.6405919661733614,
"grad_norm": 1.7228329181671143,
"learning_rate": 3.588729779646728e-06,
"loss": 1.158841609954834,
"step": 776
},
{
"epoch": 1.6448202959830867,
"grad_norm": 0.9394569396972656,
"learning_rate": 3.581424600759099e-06,
"loss": 0.7341264486312866,
"step": 778
},
{
"epoch": 1.6490486257928119,
"grad_norm": 0.6430965065956116,
"learning_rate": 3.5741092573716952e-06,
"loss": 1.096555233001709,
"step": 780
},
{
"epoch": 1.653276955602537,
"grad_norm": 1.5148671865463257,
"learning_rate": 3.5667838389175276e-06,
"loss": 0.8284240961074829,
"step": 782
},
{
"epoch": 1.6575052854122623,
"grad_norm": 0.6028370261192322,
"learning_rate": 3.55944843495278e-06,
"loss": 1.1990805864334106,
"step": 784
},
{
"epoch": 1.6617336152219875,
"grad_norm": 2.5651183128356934,
"learning_rate": 3.5521031351557116e-06,
"loss": 0.4815433621406555,
"step": 786
},
{
"epoch": 1.6659619450317125,
"grad_norm": 0.8050721287727356,
"learning_rate": 3.5447480293255666e-06,
"loss": 1.1529608964920044,
"step": 788
},
{
"epoch": 1.6701902748414377,
"grad_norm": 0.9118593335151672,
"learning_rate": 3.5373832073814668e-06,
"loss": 0.7648034691810608,
"step": 790
},
{
"epoch": 1.6744186046511627,
"grad_norm": 0.8314517736434937,
"learning_rate": 3.5300087593613186e-06,
"loss": 0.5136529207229614,
"step": 792
},
{
"epoch": 1.6786469344608879,
"grad_norm": 0.7918019890785217,
"learning_rate": 3.5226247754207138e-06,
"loss": 1.1230441331863403,
"step": 794
},
{
"epoch": 1.682875264270613,
"grad_norm": 0.4042631685733795,
"learning_rate": 3.5152313458318206e-06,
"loss": 0.6846147775650024,
"step": 796
},
{
"epoch": 1.6871035940803383,
"grad_norm": 1.0725696086883545,
"learning_rate": 3.5078285609822875e-06,
"loss": 1.2035937309265137,
"step": 798
},
{
"epoch": 1.6913319238900635,
"grad_norm": 0.5610724687576294,
"learning_rate": 3.5004165113741334e-06,
"loss": 1.1461760997772217,
"step": 800
},
{
"epoch": 1.6955602536997887,
"grad_norm": 1.0127768516540527,
"learning_rate": 3.4929952876226414e-06,
"loss": 0.6147741675376892,
"step": 802
},
{
"epoch": 1.699788583509514,
"grad_norm": 0.6945735216140747,
"learning_rate": 3.485564980455255e-06,
"loss": 1.1363788843154907,
"step": 804
},
{
"epoch": 1.7040169133192389,
"grad_norm": 1.351635217666626,
"learning_rate": 3.478125680710463e-06,
"loss": 0.8326917886734009,
"step": 806
},
{
"epoch": 1.708245243128964,
"grad_norm": 1.1634646654129028,
"learning_rate": 3.470677479336695e-06,
"loss": 0.7223104238510132,
"step": 808
},
{
"epoch": 1.712473572938689,
"grad_norm": 0.9786092042922974,
"learning_rate": 3.4632204673912034e-06,
"loss": 1.1191296577453613,
"step": 810
},
{
"epoch": 1.7167019027484143,
"grad_norm": 2.46586275100708,
"learning_rate": 3.4557547360389577e-06,
"loss": 1.3536570072174072,
"step": 812
},
{
"epoch": 1.7209302325581395,
"grad_norm": 0.8146648406982422,
"learning_rate": 3.4482803765515206e-06,
"loss": 1.100825309753418,
"step": 814
},
{
"epoch": 1.7251585623678647,
"grad_norm": 0.8478085994720459,
"learning_rate": 3.4407974803059406e-06,
"loss": 1.1602932214736938,
"step": 816
},
{
"epoch": 1.72938689217759,
"grad_norm": 0.9965582489967346,
"learning_rate": 3.4333061387836307e-06,
"loss": 0.9386340379714966,
"step": 818
},
{
"epoch": 1.733615221987315,
"grad_norm": 2.556925058364868,
"learning_rate": 3.4258064435692507e-06,
"loss": 1.0207256078720093,
"step": 820
},
{
"epoch": 1.7378435517970403,
"grad_norm": 1.3679172992706299,
"learning_rate": 3.4182984863495876e-06,
"loss": 0.6849140524864197,
"step": 822
},
{
"epoch": 1.7420718816067653,
"grad_norm": 0.4469180405139923,
"learning_rate": 3.410782358912435e-06,
"loss": 0.8242835998535156,
"step": 824
},
{
"epoch": 1.7463002114164905,
"grad_norm": 1.4416385889053345,
"learning_rate": 3.403258153145471e-06,
"loss": 0.9483500719070435,
"step": 826
},
{
"epoch": 1.7505285412262155,
"grad_norm": 0.6498605608940125,
"learning_rate": 3.3957259610351324e-06,
"loss": 0.9845226407051086,
"step": 828
},
{
"epoch": 1.7547568710359407,
"grad_norm": 2.385218620300293,
"learning_rate": 3.388185874665495e-06,
"loss": 0.8091049790382385,
"step": 830
},
{
"epoch": 1.758985200845666,
"grad_norm": 0.9289647936820984,
"learning_rate": 3.3806379862171448e-06,
"loss": 1.1820333003997803,
"step": 832
},
{
"epoch": 1.763213530655391,
"grad_norm": 1.0489366054534912,
"learning_rate": 3.373082387966048e-06,
"loss": 0.833751916885376,
"step": 834
},
{
"epoch": 1.7674418604651163,
"grad_norm": 0.571071982383728,
"learning_rate": 3.365519172282431e-06,
"loss": 0.8406846523284912,
"step": 836
},
{
"epoch": 1.7716701902748415,
"grad_norm": 0.5149204730987549,
"learning_rate": 3.357948431629643e-06,
"loss": 1.1610711812973022,
"step": 838
},
{
"epoch": 1.7758985200845667,
"grad_norm": 0.5685960054397583,
"learning_rate": 3.3503702585630305e-06,
"loss": 0.8929948806762695,
"step": 840
},
{
"epoch": 1.7801268498942917,
"grad_norm": 2.4608681201934814,
"learning_rate": 3.342784745728804e-06,
"loss": 0.4209887683391571,
"step": 842
},
{
"epoch": 1.784355179704017,
"grad_norm": 0.6386370062828064,
"learning_rate": 3.3351919858629045e-06,
"loss": 0.7464441061019897,
"step": 844
},
{
"epoch": 1.7885835095137421,
"grad_norm": 0.8585013747215271,
"learning_rate": 3.327592071789873e-06,
"loss": 0.9707925319671631,
"step": 846
},
{
"epoch": 1.792811839323467,
"grad_norm": 1.5596438646316528,
"learning_rate": 3.3199850964217116e-06,
"loss": 0.7446164488792419,
"step": 848
},
{
"epoch": 1.7970401691331923,
"grad_norm": 2.6806228160858154,
"learning_rate": 3.312371152756751e-06,
"loss": 0.7679558396339417,
"step": 850
},
{
"epoch": 1.8012684989429175,
"grad_norm": 0.7470750212669373,
"learning_rate": 3.304750333878511e-06,
"loss": 1.0020787715911865,
"step": 852
},
{
"epoch": 1.8054968287526427,
"grad_norm": 0.5787929892539978,
"learning_rate": 3.2971227329545634e-06,
"loss": 0.8919803500175476,
"step": 854
},
{
"epoch": 1.809725158562368,
"grad_norm": 0.50643390417099,
"learning_rate": 3.2894884432353957e-06,
"loss": 1.0985815525054932,
"step": 856
},
{
"epoch": 1.8139534883720931,
"grad_norm": 1.5751054286956787,
"learning_rate": 3.281847558053265e-06,
"loss": 0.6541829109191895,
"step": 858
},
{
"epoch": 1.8181818181818183,
"grad_norm": 0.6766167283058167,
"learning_rate": 3.274200170821064e-06,
"loss": 1.0379619598388672,
"step": 860
},
{
"epoch": 1.8224101479915433,
"grad_norm": 1.3516942262649536,
"learning_rate": 3.2665463750311727e-06,
"loss": 1.1044809818267822,
"step": 862
},
{
"epoch": 1.8266384778012685,
"grad_norm": 1.573708415031433,
"learning_rate": 3.2588862642543208e-06,
"loss": 0.5707927942276001,
"step": 864
},
{
"epoch": 1.8308668076109935,
"grad_norm": 0.4704338312149048,
"learning_rate": 3.2512199321384393e-06,
"loss": 0.7981724143028259,
"step": 866
},
{
"epoch": 1.8350951374207187,
"grad_norm": 0.5769053101539612,
"learning_rate": 3.243547472407518e-06,
"loss": 1.1399530172348022,
"step": 868
},
{
"epoch": 1.839323467230444,
"grad_norm": 0.8416613340377808,
"learning_rate": 3.23586897886046e-06,
"loss": 1.1359026432037354,
"step": 870
},
{
"epoch": 1.8435517970401691,
"grad_norm": 1.3577508926391602,
"learning_rate": 3.2281845453699345e-06,
"loss": 0.8569067716598511,
"step": 872
},
{
"epoch": 1.8477801268498943,
"grad_norm": 2.056459665298462,
"learning_rate": 3.220494265881227e-06,
"loss": 1.1351348161697388,
"step": 874
},
{
"epoch": 1.8520084566596196,
"grad_norm": 2.2590129375457764,
"learning_rate": 3.212798234411095e-06,
"loss": 0.9369499087333679,
"step": 876
},
{
"epoch": 1.8562367864693448,
"grad_norm": 1.6008362770080566,
"learning_rate": 3.2050965450466136e-06,
"loss": 0.2906026244163513,
"step": 878
},
{
"epoch": 1.8604651162790697,
"grad_norm": 1.1616226434707642,
"learning_rate": 3.197389291944032e-06,
"loss": 0.7301267385482788,
"step": 880
},
{
"epoch": 1.864693446088795,
"grad_norm": 0.7197896242141724,
"learning_rate": 3.1896765693276135e-06,
"loss": 1.1232812404632568,
"step": 882
},
{
"epoch": 1.86892177589852,
"grad_norm": 0.6383930444717407,
"learning_rate": 3.1819584714884903e-06,
"loss": 0.7655252814292908,
"step": 884
},
{
"epoch": 1.8731501057082451,
"grad_norm": 1.0251339673995972,
"learning_rate": 3.1742350927835125e-06,
"loss": 1.121950387954712,
"step": 886
},
{
"epoch": 1.8773784355179703,
"grad_norm": 0.6124594807624817,
"learning_rate": 3.1665065276340844e-06,
"loss": 1.1401907205581665,
"step": 888
},
{
"epoch": 1.8816067653276956,
"grad_norm": 1.397496223449707,
"learning_rate": 3.158772870525022e-06,
"loss": 0.6092522144317627,
"step": 890
},
{
"epoch": 1.8858350951374208,
"grad_norm": 2.1092441082000732,
"learning_rate": 3.1510342160033903e-06,
"loss": 0.8399344086647034,
"step": 892
},
{
"epoch": 1.890063424947146,
"grad_norm": 1.1821517944335938,
"learning_rate": 3.1432906586773488e-06,
"loss": 1.114659070968628,
"step": 894
},
{
"epoch": 1.8942917547568712,
"grad_norm": 1.5643501281738281,
"learning_rate": 3.135542293214997e-06,
"loss": 1.410881519317627,
"step": 896
},
{
"epoch": 1.8985200845665962,
"grad_norm": 0.7187080383300781,
"learning_rate": 3.1277892143432165e-06,
"loss": 1.065239429473877,
"step": 898
},
{
"epoch": 1.9027484143763214,
"grad_norm": 1.0004364252090454,
"learning_rate": 3.1200315168465113e-06,
"loss": 0.5023792386054993,
"step": 900
},
{
"epoch": 1.9069767441860463,
"grad_norm": 0.5592870116233826,
"learning_rate": 3.1122692955658497e-06,
"loss": 1.107616901397705,
"step": 902
},
{
"epoch": 1.9112050739957716,
"grad_norm": 1.719496726989746,
"learning_rate": 3.1045026453975048e-06,
"loss": 0.5772966146469116,
"step": 904
},
{
"epoch": 1.9154334038054968,
"grad_norm": 1.4579967260360718,
"learning_rate": 3.096731661291896e-06,
"loss": 0.8818938136100769,
"step": 906
},
{
"epoch": 1.919661733615222,
"grad_norm": 0.8083340525627136,
"learning_rate": 3.0889564382524257e-06,
"loss": 1.2467647790908813,
"step": 908
},
{
"epoch": 1.9238900634249472,
"grad_norm": 0.5722190737724304,
"learning_rate": 3.08117707133432e-06,
"loss": 0.812706708908081,
"step": 910
},
{
"epoch": 1.9281183932346724,
"grad_norm": 0.6764684319496155,
"learning_rate": 3.0733936556434634e-06,
"loss": 1.1728202104568481,
"step": 912
},
{
"epoch": 1.9323467230443976,
"grad_norm": 0.9146943688392639,
"learning_rate": 3.0656062863352413e-06,
"loss": 0.7626368999481201,
"step": 914
},
{
"epoch": 1.9365750528541226,
"grad_norm": 0.5261140465736389,
"learning_rate": 3.0578150586133704e-06,
"loss": 1.1478456258773804,
"step": 916
},
{
"epoch": 1.9408033826638478,
"grad_norm": 0.6775986552238464,
"learning_rate": 3.0500200677287428e-06,
"loss": 0.6973150968551636,
"step": 918
},
{
"epoch": 1.945031712473573,
"grad_norm": 1.3343801498413086,
"learning_rate": 3.042221408978251e-06,
"loss": 0.9482506513595581,
"step": 920
},
{
"epoch": 1.949260042283298,
"grad_norm": 1.5522212982177734,
"learning_rate": 3.0344191777036312e-06,
"loss": 0.9986613392829895,
"step": 922
},
{
"epoch": 1.9534883720930232,
"grad_norm": 0.4814535677433014,
"learning_rate": 3.026613469290298e-06,
"loss": 1.0413583517074585,
"step": 924
},
{
"epoch": 1.9577167019027484,
"grad_norm": 0.3166026473045349,
"learning_rate": 3.01880437916617e-06,
"loss": 0.9614431262016296,
"step": 926
},
{
"epoch": 1.9619450317124736,
"grad_norm": 1.094480037689209,
"learning_rate": 3.0109920028005135e-06,
"loss": 1.2636445760726929,
"step": 928
},
{
"epoch": 1.9661733615221988,
"grad_norm": 0.7118551135063171,
"learning_rate": 3.003176435702767e-06,
"loss": 0.9028820395469666,
"step": 930
},
{
"epoch": 1.970401691331924,
"grad_norm": 0.5945522785186768,
"learning_rate": 2.9953577734213775e-06,
"loss": 1.2327357530593872,
"step": 932
},
{
"epoch": 1.9746300211416492,
"grad_norm": 0.7517629265785217,
"learning_rate": 2.9875361115426347e-06,
"loss": 0.8936224579811096,
"step": 934
},
{
"epoch": 1.9788583509513742,
"grad_norm": 0.9688192009925842,
"learning_rate": 2.979711545689496e-06,
"loss": 0.7812487483024597,
"step": 936
},
{
"epoch": 1.9830866807610994,
"grad_norm": 8.303119659423828,
"learning_rate": 2.9718841715204227e-06,
"loss": 0.873395562171936,
"step": 938
},
{
"epoch": 1.9873150105708244,
"grad_norm": 0.6607802510261536,
"learning_rate": 2.9640540847282095e-06,
"loss": 1.0979681015014648,
"step": 940
},
{
"epoch": 1.9915433403805496,
"grad_norm": 0.6019576787948608,
"learning_rate": 2.956221381038812e-06,
"loss": 1.1199960708618164,
"step": 942
},
{
"epoch": 1.9957716701902748,
"grad_norm": 0.7929393649101257,
"learning_rate": 2.94838615621018e-06,
"loss": 1.1161065101623535,
"step": 944
},
{
"epoch": 2.0,
"grad_norm": 0.6814373135566711,
"learning_rate": 2.9405485060310857e-06,
"loss": 0.48783794045448303,
"step": 946
},
{
"epoch": 2.004228329809725,
"grad_norm": 1.4736313819885254,
"learning_rate": 2.9327085263199507e-06,
"loss": 0.7957913279533386,
"step": 948
},
{
"epoch": 2.0084566596194504,
"grad_norm": 0.4455970823764801,
"learning_rate": 2.924866312923677e-06,
"loss": 1.0547270774841309,
"step": 950
},
{
"epoch": 2.0126849894291756,
"grad_norm": 0.773058295249939,
"learning_rate": 2.9170219617164735e-06,
"loss": 1.0442657470703125,
"step": 952
},
{
"epoch": 2.016913319238901,
"grad_norm": 0.9597894549369812,
"learning_rate": 2.9091755685986866e-06,
"loss": 1.1685289144515991,
"step": 954
},
{
"epoch": 2.0211416490486256,
"grad_norm": 0.6969325542449951,
"learning_rate": 2.9013272294956223e-06,
"loss": 1.1930384635925293,
"step": 956
},
{
"epoch": 2.025369978858351,
"grad_norm": 0.8082700967788696,
"learning_rate": 2.8934770403563815e-06,
"loss": 0.776046872138977,
"step": 958
},
{
"epoch": 2.029598308668076,
"grad_norm": 0.7422521710395813,
"learning_rate": 2.8856250971526788e-06,
"loss": 1.0249298810958862,
"step": 960
},
{
"epoch": 2.033826638477801,
"grad_norm": 1.6249040365219116,
"learning_rate": 2.877771495877676e-06,
"loss": 0.9289775490760803,
"step": 962
},
{
"epoch": 2.0380549682875264,
"grad_norm": 3.067833185195923,
"learning_rate": 2.869916332544802e-06,
"loss": 0.8100100159645081,
"step": 964
},
{
"epoch": 2.0422832980972516,
"grad_norm": 0.724915087223053,
"learning_rate": 2.8620597031865854e-06,
"loss": 0.7401767373085022,
"step": 966
},
{
"epoch": 2.046511627906977,
"grad_norm": 2.0869836807250977,
"learning_rate": 2.854201703853477e-06,
"loss": 0.8137513399124146,
"step": 968
},
{
"epoch": 2.050739957716702,
"grad_norm": 0.6877044439315796,
"learning_rate": 2.8463424306126743e-06,
"loss": 1.10543692111969,
"step": 970
},
{
"epoch": 2.0549682875264272,
"grad_norm": 1.3014296293258667,
"learning_rate": 2.838481979546952e-06,
"loss": 0.5617172122001648,
"step": 972
},
{
"epoch": 2.059196617336152,
"grad_norm": 0.9769271016120911,
"learning_rate": 2.83062044675348e-06,
"loss": 1.060500144958496,
"step": 974
},
{
"epoch": 2.063424947145877,
"grad_norm": 2.4497523307800293,
"learning_rate": 2.822757928342658e-06,
"loss": 1.075200080871582,
"step": 976
},
{
"epoch": 2.0676532769556024,
"grad_norm": 0.8020917177200317,
"learning_rate": 2.814894520436931e-06,
"loss": 1.0989971160888672,
"step": 978
},
{
"epoch": 2.0718816067653276,
"grad_norm": 1.6352614164352417,
"learning_rate": 2.807030319169619e-06,
"loss": 0.699384868144989,
"step": 980
},
{
"epoch": 2.076109936575053,
"grad_norm": 6.557322978973389,
"learning_rate": 2.7991654206837434e-06,
"loss": 0.7373824119567871,
"step": 982
},
{
"epoch": 2.080338266384778,
"grad_norm": 0.7050887942314148,
"learning_rate": 2.7912999211308466e-06,
"loss": 0.8136764168739319,
"step": 984
},
{
"epoch": 2.0845665961945032,
"grad_norm": 1.3208609819412231,
"learning_rate": 2.783433916669822e-06,
"loss": 0.9552209973335266,
"step": 986
},
{
"epoch": 2.0887949260042284,
"grad_norm": 0.6587861180305481,
"learning_rate": 2.7755675034657336e-06,
"loss": 1.0741578340530396,
"step": 988
},
{
"epoch": 2.0930232558139537,
"grad_norm": 1.1716125011444092,
"learning_rate": 2.7677007776886437e-06,
"loss": 1.0747499465942383,
"step": 990
},
{
"epoch": 2.097251585623679,
"grad_norm": 1.5702075958251953,
"learning_rate": 2.759833835512435e-06,
"loss": 0.670864999294281,
"step": 992
},
{
"epoch": 2.1014799154334036,
"grad_norm": 22.38727569580078,
"learning_rate": 2.7519667731136364e-06,
"loss": 0.7279332280158997,
"step": 994
},
{
"epoch": 2.105708245243129,
"grad_norm": 0.8443185091018677,
"learning_rate": 2.7440996866702458e-06,
"loss": 0.8103309869766235,
"step": 996
},
{
"epoch": 2.109936575052854,
"grad_norm": 0.8229217529296875,
"learning_rate": 2.7362326723605566e-06,
"loss": 1.036565899848938,
"step": 998
},
{
"epoch": 2.1141649048625792,
"grad_norm": 0.7176088094711304,
"learning_rate": 2.7283658263619794e-06,
"loss": 1.0687159299850464,
"step": 1000
},
{
"epoch": 2.1183932346723044,
"grad_norm": 0.6158708333969116,
"learning_rate": 2.7204992448498657e-06,
"loss": 0.24933312833309174,
"step": 1002
},
{
"epoch": 2.1226215644820297,
"grad_norm": 1.7368133068084717,
"learning_rate": 2.712633023996336e-06,
"loss": 0.7682783007621765,
"step": 1004
},
{
"epoch": 2.126849894291755,
"grad_norm": 0.6421481966972351,
"learning_rate": 2.7047672599691e-06,
"loss": 1.0240600109100342,
"step": 1006
},
{
"epoch": 2.13107822410148,
"grad_norm": 1.3722180128097534,
"learning_rate": 2.696902048930284e-06,
"loss": 0.9667700529098511,
"step": 1008
},
{
"epoch": 2.1353065539112053,
"grad_norm": 0.6478885412216187,
"learning_rate": 2.6890374870352532e-06,
"loss": 0.8398556113243103,
"step": 1010
},
{
"epoch": 2.13953488372093,
"grad_norm": 1.5403518676757812,
"learning_rate": 2.6811736704314344e-06,
"loss": 0.57329922914505,
"step": 1012
},
{
"epoch": 2.1437632135306552,
"grad_norm": 0.8799501061439514,
"learning_rate": 2.6733106952571467e-06,
"loss": 0.6521193981170654,
"step": 1014
},
{
"epoch": 2.1479915433403804,
"grad_norm": 0.9985294938087463,
"learning_rate": 2.6654486576404197e-06,
"loss": 0.8588607311248779,
"step": 1016
},
{
"epoch": 2.1522198731501057,
"grad_norm": 0.9864040017127991,
"learning_rate": 2.657587653697822e-06,
"loss": 1.0104336738586426,
"step": 1018
},
{
"epoch": 2.156448202959831,
"grad_norm": 6.648144245147705,
"learning_rate": 2.6497277795332855e-06,
"loss": 0.8407163619995117,
"step": 1020
},
{
"epoch": 2.160676532769556,
"grad_norm": 0.837396502494812,
"learning_rate": 2.6418691312369295e-06,
"loss": 0.7050214409828186,
"step": 1022
},
{
"epoch": 2.1649048625792813,
"grad_norm": 0.6134306192398071,
"learning_rate": 2.634011804883886e-06,
"loss": 1.0578330755233765,
"step": 1024
},
{
"epoch": 2.1691331923890065,
"grad_norm": 0.7147375345230103,
"learning_rate": 2.6261558965331272e-06,
"loss": 1.0594534873962402,
"step": 1026
},
{
"epoch": 2.1733615221987317,
"grad_norm": 0.6058641672134399,
"learning_rate": 2.6183015022262892e-06,
"loss": 1.0534790754318237,
"step": 1028
},
{
"epoch": 2.177589852008457,
"grad_norm": 0.6654782891273499,
"learning_rate": 2.610448717986496e-06,
"loss": 1.067839503288269,
"step": 1030
},
{
"epoch": 2.1818181818181817,
"grad_norm": 1.3797681331634521,
"learning_rate": 2.6025976398171927e-06,
"loss": 1.0668026208877563,
"step": 1032
},
{
"epoch": 2.186046511627907,
"grad_norm": 0.6694332361221313,
"learning_rate": 2.5947483637009622e-06,
"loss": 1.1499404907226562,
"step": 1034
},
{
"epoch": 2.190274841437632,
"grad_norm": 0.46979257464408875,
"learning_rate": 2.586900985598358e-06,
"loss": 0.8229663372039795,
"step": 1036
},
{
"epoch": 2.1945031712473573,
"grad_norm": 2.6087801456451416,
"learning_rate": 2.579055601446732e-06,
"loss": 0.4731891453266144,
"step": 1038
},
{
"epoch": 2.1987315010570825,
"grad_norm": 1.0109667778015137,
"learning_rate": 2.571212307159056e-06,
"loss": 1.0908327102661133,
"step": 1040
},
{
"epoch": 2.2029598308668077,
"grad_norm": 2.8863284587860107,
"learning_rate": 2.563371198622755e-06,
"loss": 0.36066552996635437,
"step": 1042
},
{
"epoch": 2.207188160676533,
"grad_norm": 1.16829514503479,
"learning_rate": 2.5555323716985304e-06,
"loss": 1.053403615951538,
"step": 1044
},
{
"epoch": 2.211416490486258,
"grad_norm": 0.7351399064064026,
"learning_rate": 2.54769592221919e-06,
"loss": 0.6402167677879333,
"step": 1046
},
{
"epoch": 2.2156448202959833,
"grad_norm": 3.0547754764556885,
"learning_rate": 2.539861945988478e-06,
"loss": 0.8964632749557495,
"step": 1048
},
{
"epoch": 2.219873150105708,
"grad_norm": 1.3434550762176514,
"learning_rate": 2.5320305387799014e-06,
"loss": 0.6596440076828003,
"step": 1050
},
{
"epoch": 2.2241014799154333,
"grad_norm": 0.6450607776641846,
"learning_rate": 2.524201796335558e-06,
"loss": 0.9056267142295837,
"step": 1052
},
{
"epoch": 2.2283298097251585,
"grad_norm": 0.6330105066299438,
"learning_rate": 2.5163758143649716e-06,
"loss": 1.0713391304016113,
"step": 1054
},
{
"epoch": 2.2325581395348837,
"grad_norm": 0.6766862273216248,
"learning_rate": 2.5085526885439145e-06,
"loss": 1.0640653371810913,
"step": 1056
},
{
"epoch": 2.236786469344609,
"grad_norm": 0.3488926291465759,
"learning_rate": 2.5007325145132427e-06,
"loss": 0.8341073393821716,
"step": 1058
},
{
"epoch": 2.241014799154334,
"grad_norm": 2.001237154006958,
"learning_rate": 2.4929153878777268e-06,
"loss": 0.9115666747093201,
"step": 1060
},
{
"epoch": 2.2452431289640593,
"grad_norm": 0.7693464756011963,
"learning_rate": 2.48510140420488e-06,
"loss": 1.0226731300354004,
"step": 1062
},
{
"epoch": 2.2494714587737845,
"grad_norm": 1.4121301174163818,
"learning_rate": 2.477290659023791e-06,
"loss": 1.0118439197540283,
"step": 1064
},
{
"epoch": 2.2536997885835097,
"grad_norm": 2.2806310653686523,
"learning_rate": 2.469483247823959e-06,
"loss": 0.632957398891449,
"step": 1066
},
{
"epoch": 2.2579281183932345,
"grad_norm": 0.8324834704399109,
"learning_rate": 2.461679266054122e-06,
"loss": 0.8787606954574585,
"step": 1068
},
{
"epoch": 2.2621564482029597,
"grad_norm": 1.5810331106185913,
"learning_rate": 2.453878809121093e-06,
"loss": 0.8886688351631165,
"step": 1070
},
{
"epoch": 2.266384778012685,
"grad_norm": 0.6590220332145691,
"learning_rate": 2.4460819723885903e-06,
"loss": 1.0459415912628174,
"step": 1072
},
{
"epoch": 2.27061310782241,
"grad_norm": 0.26749613881111145,
"learning_rate": 2.4382888511760773e-06,
"loss": 0.7614855170249939,
"step": 1074
},
{
"epoch": 2.2748414376321353,
"grad_norm": 1.3493986129760742,
"learning_rate": 2.4304995407575917e-06,
"loss": 0.900128185749054,
"step": 1076
},
{
"epoch": 2.2790697674418605,
"grad_norm": 8.0263090133667,
"learning_rate": 2.4227141363605804e-06,
"loss": 0.22701826691627502,
"step": 1078
},
{
"epoch": 2.2832980972515857,
"grad_norm": 0.5107969641685486,
"learning_rate": 2.4149327331647432e-06,
"loss": 0.16721072793006897,
"step": 1080
},
{
"epoch": 2.287526427061311,
"grad_norm": 0.9236059188842773,
"learning_rate": 2.4071554263008584e-06,
"loss": 0.5462712645530701,
"step": 1082
},
{
"epoch": 2.291754756871036,
"grad_norm": 1.4398772716522217,
"learning_rate": 2.3993823108496272e-06,
"loss": 0.43305540084838867,
"step": 1084
},
{
"epoch": 2.295983086680761,
"grad_norm": 0.5344212055206299,
"learning_rate": 2.391613481840509e-06,
"loss": 0.25760167837142944,
"step": 1086
},
{
"epoch": 2.300211416490486,
"grad_norm": 5.494821071624756,
"learning_rate": 2.38384903425056e-06,
"loss": 0.7133547067642212,
"step": 1088
},
{
"epoch": 2.3044397463002113,
"grad_norm": 0.9530798196792603,
"learning_rate": 2.376089063003272e-06,
"loss": 0.9048901200294495,
"step": 1090
},
{
"epoch": 2.3086680761099365,
"grad_norm": 0.7235156893730164,
"learning_rate": 2.3683336629674096e-06,
"loss": 0.6983910202980042,
"step": 1092
},
{
"epoch": 2.3128964059196617,
"grad_norm": 0.6613774299621582,
"learning_rate": 2.3605829289558545e-06,
"loss": 1.0634891986846924,
"step": 1094
},
{
"epoch": 2.317124735729387,
"grad_norm": 0.7909154891967773,
"learning_rate": 2.3528369557244453e-06,
"loss": 1.035917043685913,
"step": 1096
},
{
"epoch": 2.321353065539112,
"grad_norm": 0.8521804213523865,
"learning_rate": 2.3450958379708156e-06,
"loss": 1.009893774986267,
"step": 1098
},
{
"epoch": 2.3255813953488373,
"grad_norm": 2.444586753845215,
"learning_rate": 2.3373596703332383e-06,
"loss": 0.6026294827461243,
"step": 1100
},
{
"epoch": 2.3298097251585626,
"grad_norm": 0.8242626786231995,
"learning_rate": 2.3296285473894746e-06,
"loss": 0.7475822567939758,
"step": 1102
},
{
"epoch": 2.3340380549682873,
"grad_norm": 0.684226930141449,
"learning_rate": 2.321902563655606e-06,
"loss": 1.0707495212554932,
"step": 1104
},
{
"epoch": 2.3382663847780125,
"grad_norm": 0.8783945441246033,
"learning_rate": 2.314181813584887e-06,
"loss": 1.013008952140808,
"step": 1106
},
{
"epoch": 2.3424947145877377,
"grad_norm": 0.9921977519989014,
"learning_rate": 2.306466391566591e-06,
"loss": 0.9479020833969116,
"step": 1108
},
{
"epoch": 2.346723044397463,
"grad_norm": 0.7830618619918823,
"learning_rate": 2.2987563919248518e-06,
"loss": 1.1364282369613647,
"step": 1110
},
{
"epoch": 2.350951374207188,
"grad_norm": 0.26116877794265747,
"learning_rate": 2.2910519089175103e-06,
"loss": 0.6622422933578491,
"step": 1112
},
{
"epoch": 2.3551797040169133,
"grad_norm": 4.712930202484131,
"learning_rate": 2.283353036734969e-06,
"loss": 0.94716477394104,
"step": 1114
},
{
"epoch": 2.3594080338266386,
"grad_norm": 0.9706722497940063,
"learning_rate": 2.2756598694990334e-06,
"loss": 0.6431679725646973,
"step": 1116
},
{
"epoch": 2.3636363636363638,
"grad_norm": 1.9938366413116455,
"learning_rate": 2.267972501261762e-06,
"loss": 1.308355450630188,
"step": 1118
},
{
"epoch": 2.367864693446089,
"grad_norm": 0.7777484059333801,
"learning_rate": 2.2602910260043208e-06,
"loss": 1.0695171356201172,
"step": 1120
},
{
"epoch": 2.3720930232558137,
"grad_norm": 0.7761583924293518,
"learning_rate": 2.252615537635831e-06,
"loss": 0.9347115755081177,
"step": 1122
},
{
"epoch": 2.376321353065539,
"grad_norm": 0.7822389006614685,
"learning_rate": 2.244946129992223e-06,
"loss": 0.7232018113136292,
"step": 1124
},
{
"epoch": 2.380549682875264,
"grad_norm": 2.1133530139923096,
"learning_rate": 2.2372828968350834e-06,
"loss": 1.0389723777770996,
"step": 1126
},
{
"epoch": 2.3847780126849893,
"grad_norm": 1.3042513132095337,
"learning_rate": 2.229625931850519e-06,
"loss": 0.7246500849723816,
"step": 1128
},
{
"epoch": 2.3890063424947146,
"grad_norm": 0.8496916890144348,
"learning_rate": 2.221975328648002e-06,
"loss": 0.8411369323730469,
"step": 1130
},
{
"epoch": 2.3932346723044398,
"grad_norm": 1.2774096727371216,
"learning_rate": 2.2143311807592292e-06,
"loss": 0.7468405961990356,
"step": 1132
},
{
"epoch": 2.397463002114165,
"grad_norm": 0.6452171206474304,
"learning_rate": 2.206693581636982e-06,
"loss": 1.111289620399475,
"step": 1134
},
{
"epoch": 2.40169133192389,
"grad_norm": 5.754592418670654,
"learning_rate": 2.1990626246539753e-06,
"loss": 0.6915456056594849,
"step": 1136
},
{
"epoch": 2.4059196617336154,
"grad_norm": 1.6072407960891724,
"learning_rate": 2.1914384031017265e-06,
"loss": 0.8382232189178467,
"step": 1138
},
{
"epoch": 2.41014799154334,
"grad_norm": 0.4873308837413788,
"learning_rate": 2.1838210101894062e-06,
"loss": 1.0329222679138184,
"step": 1140
},
{
"epoch": 2.4143763213530653,
"grad_norm": 0.7448446154594421,
"learning_rate": 2.1762105390427026e-06,
"loss": 1.19656503200531,
"step": 1142
},
{
"epoch": 2.4186046511627906,
"grad_norm": 2.470224618911743,
"learning_rate": 2.168607082702684e-06,
"loss": 0.6114988923072815,
"step": 1144
},
{
"epoch": 2.4228329809725158,
"grad_norm": 4.100384712219238,
"learning_rate": 2.161010734124658e-06,
"loss": 0.7755101323127747,
"step": 1146
},
{
"epoch": 2.427061310782241,
"grad_norm": 0.8485273122787476,
"learning_rate": 2.153421586177038e-06,
"loss": 0.8298628926277161,
"step": 1148
},
{
"epoch": 2.431289640591966,
"grad_norm": 1.0596591234207153,
"learning_rate": 2.145839731640208e-06,
"loss": 0.5695077180862427,
"step": 1150
},
{
"epoch": 2.4355179704016914,
"grad_norm": 0.32878732681274414,
"learning_rate": 2.138265263205384e-06,
"loss": 0.6108872890472412,
"step": 1152
},
{
"epoch": 2.4397463002114166,
"grad_norm": 0.47924017906188965,
"learning_rate": 2.130698273473486e-06,
"loss": 0.575315535068512,
"step": 1154
},
{
"epoch": 2.443974630021142,
"grad_norm": 0.5258365273475647,
"learning_rate": 2.1231388549540045e-06,
"loss": 0.9532243609428406,
"step": 1156
},
{
"epoch": 2.448202959830867,
"grad_norm": 4.6877546310424805,
"learning_rate": 2.115587100063868e-06,
"loss": 0.5808656811714172,
"step": 1158
},
{
"epoch": 2.452431289640592,
"grad_norm": 0.8416226506233215,
"learning_rate": 2.108043101126312e-06,
"loss": 1.0306192636489868,
"step": 1160
},
{
"epoch": 2.456659619450317,
"grad_norm": 3.2165985107421875,
"learning_rate": 2.1005069503697566e-06,
"loss": 1.0111299753189087,
"step": 1162
},
{
"epoch": 2.460887949260042,
"grad_norm": 0.6864579916000366,
"learning_rate": 2.092978739926672e-06,
"loss": 0.8028541207313538,
"step": 1164
},
{
"epoch": 2.4651162790697674,
"grad_norm": 0.9489989280700684,
"learning_rate": 2.0854585618324548e-06,
"loss": 1.2172460556030273,
"step": 1166
},
{
"epoch": 2.4693446088794926,
"grad_norm": 1.215120553970337,
"learning_rate": 2.0779465080243037e-06,
"loss": 1.3246065378189087,
"step": 1168
},
{
"epoch": 2.473572938689218,
"grad_norm": 0.6394163370132446,
"learning_rate": 2.0704426703400944e-06,
"loss": 0.7735956311225891,
"step": 1170
},
{
"epoch": 2.477801268498943,
"grad_norm": 1.1398952007293701,
"learning_rate": 2.0629471405172585e-06,
"loss": 0.8254691362380981,
"step": 1172
},
{
"epoch": 2.482029598308668,
"grad_norm": 0.5559751987457275,
"learning_rate": 2.055460010191658e-06,
"loss": 0.7504424452781677,
"step": 1174
},
{
"epoch": 2.4862579281183934,
"grad_norm": 0.8105632066726685,
"learning_rate": 2.0479813708964693e-06,
"loss": 0.7769438028335571,
"step": 1176
},
{
"epoch": 2.4904862579281186,
"grad_norm": 1.449171781539917,
"learning_rate": 2.0405113140610634e-06,
"loss": 0.8921318650245667,
"step": 1178
},
{
"epoch": 2.4947145877378434,
"grad_norm": 1.4208768606185913,
"learning_rate": 2.033049931009885e-06,
"loss": 0.6842445135116577,
"step": 1180
},
{
"epoch": 2.4989429175475686,
"grad_norm": 0.4888696074485779,
"learning_rate": 2.0255973129613406e-06,
"loss": 0.567357063293457,
"step": 1182
},
{
"epoch": 2.503171247357294,
"grad_norm": 0.8814659118652344,
"learning_rate": 2.0181535510266796e-06,
"loss": 0.1589071899652481,
"step": 1184
},
{
"epoch": 2.507399577167019,
"grad_norm": 1.7633031606674194,
"learning_rate": 2.0107187362088816e-06,
"loss": 0.9725368618965149,
"step": 1186
},
{
"epoch": 2.511627906976744,
"grad_norm": 2.5048136711120605,
"learning_rate": 2.0032929594015456e-06,
"loss": 0.9178006649017334,
"step": 1188
},
{
"epoch": 2.5158562367864694,
"grad_norm": 1.5520225763320923,
"learning_rate": 1.9958763113877755e-06,
"loss": 0.7678893804550171,
"step": 1190
},
{
"epoch": 2.5200845665961946,
"grad_norm": 0.5215038061141968,
"learning_rate": 1.988468882839075e-06,
"loss": 1.001523733139038,
"step": 1192
},
{
"epoch": 2.52431289640592,
"grad_norm": 0.6024693846702576,
"learning_rate": 1.9810707643142325e-06,
"loss": 0.6263225674629211,
"step": 1194
},
{
"epoch": 2.528541226215645,
"grad_norm": 1.617968201637268,
"learning_rate": 1.9736820462582186e-06,
"loss": 1.0076720714569092,
"step": 1196
},
{
"epoch": 2.53276955602537,
"grad_norm": 0.7982508540153503,
"learning_rate": 1.9663028190010815e-06,
"loss": 1.0421154499053955,
"step": 1198
},
{
"epoch": 2.536997885835095,
"grad_norm": 1.1996971368789673,
"learning_rate": 1.9589331727568384e-06,
"loss": 0.7256770133972168,
"step": 1200
},
{
"epoch": 2.54122621564482,
"grad_norm": 0.744490921497345,
"learning_rate": 1.9515731976223746e-06,
"loss": 1.0210518836975098,
"step": 1202
},
{
"epoch": 2.5454545454545454,
"grad_norm": 1.66182541847229,
"learning_rate": 1.9442229835763454e-06,
"loss": 0.44427788257598877,
"step": 1204
},
{
"epoch": 2.5496828752642706,
"grad_norm": 0.6226742267608643,
"learning_rate": 1.936882620478069e-06,
"loss": 1.068085789680481,
"step": 1206
},
{
"epoch": 2.553911205073996,
"grad_norm": 1.4527463912963867,
"learning_rate": 1.9295521980664317e-06,
"loss": 1.060996174812317,
"step": 1208
},
{
"epoch": 2.558139534883721,
"grad_norm": 0.6856507062911987,
"learning_rate": 1.922231805958795e-06,
"loss": 1.039587140083313,
"step": 1210
},
{
"epoch": 2.5623678646934462,
"grad_norm": 1.3432971239089966,
"learning_rate": 1.914921533649894e-06,
"loss": 0.7191824316978455,
"step": 1212
},
{
"epoch": 2.5665961945031714,
"grad_norm": 0.7632008194923401,
"learning_rate": 1.9076214705107417e-06,
"loss": 1.0393006801605225,
"step": 1214
},
{
"epoch": 2.570824524312896,
"grad_norm": 1.0369495153427124,
"learning_rate": 1.9003317057875443e-06,
"loss": 0.6147840023040771,
"step": 1216
},
{
"epoch": 2.5750528541226214,
"grad_norm": 1.336530089378357,
"learning_rate": 1.8930523286006052e-06,
"loss": 0.6377484202384949,
"step": 1218
},
{
"epoch": 2.5792811839323466,
"grad_norm": 2.0891432762145996,
"learning_rate": 1.8857834279432336e-06,
"loss": 0.509937584400177,
"step": 1220
},
{
"epoch": 2.583509513742072,
"grad_norm": 3.55784010887146,
"learning_rate": 1.8785250926806613e-06,
"loss": 0.5913651585578918,
"step": 1222
},
{
"epoch": 2.587737843551797,
"grad_norm": 4.819112777709961,
"learning_rate": 1.8712774115489524e-06,
"loss": 0.8116767406463623,
"step": 1224
},
{
"epoch": 2.5919661733615222,
"grad_norm": 0.43531814217567444,
"learning_rate": 1.8640404731539218e-06,
"loss": 0.47326603531837463,
"step": 1226
},
{
"epoch": 2.5961945031712474,
"grad_norm": 0.8789650201797485,
"learning_rate": 1.8568143659700472e-06,
"loss": 0.7499734163284302,
"step": 1228
},
{
"epoch": 2.6004228329809727,
"grad_norm": 1.4755181074142456,
"learning_rate": 1.8495991783393924e-06,
"loss": 0.8303921222686768,
"step": 1230
},
{
"epoch": 2.604651162790698,
"grad_norm": 3.1309523582458496,
"learning_rate": 1.8423949984705257e-06,
"loss": 0.7273667454719543,
"step": 1232
},
{
"epoch": 2.6088794926004226,
"grad_norm": 1.6632975339889526,
"learning_rate": 1.8352019144374406e-06,
"loss": 0.8571827411651611,
"step": 1234
},
{
"epoch": 2.613107822410148,
"grad_norm": 0.7448071241378784,
"learning_rate": 1.8280200141784771e-06,
"loss": 0.8664517998695374,
"step": 1236
},
{
"epoch": 2.617336152219873,
"grad_norm": 0.8705071210861206,
"learning_rate": 1.8208493854952535e-06,
"loss": 0.9958084225654602,
"step": 1238
},
{
"epoch": 2.6215644820295982,
"grad_norm": 0.7441583275794983,
"learning_rate": 1.8136901160515869e-06,
"loss": 0.7479358315467834,
"step": 1240
},
{
"epoch": 2.6257928118393234,
"grad_norm": 0.6350056529045105,
"learning_rate": 1.8065422933724192e-06,
"loss": 0.8547337651252747,
"step": 1242
},
{
"epoch": 2.6300211416490487,
"grad_norm": 1.2663848400115967,
"learning_rate": 1.799406004842757e-06,
"loss": 1.0284228324890137,
"step": 1244
},
{
"epoch": 2.634249471458774,
"grad_norm": 2.9096410274505615,
"learning_rate": 1.7922813377065946e-06,
"loss": 0.6996232867240906,
"step": 1246
},
{
"epoch": 2.638477801268499,
"grad_norm": 2.602738857269287,
"learning_rate": 1.7851683790658492e-06,
"loss": 0.5642688274383545,
"step": 1248
},
{
"epoch": 2.6427061310782243,
"grad_norm": 0.6103304028511047,
"learning_rate": 1.7780672158792979e-06,
"loss": 1.0508077144622803,
"step": 1250
},
{
"epoch": 2.646934460887949,
"grad_norm": 3.92741322517395,
"learning_rate": 1.7709779349615152e-06,
"loss": 0.5398973822593689,
"step": 1252
},
{
"epoch": 2.6511627906976747,
"grad_norm": 3.5654373168945312,
"learning_rate": 1.763900622981805e-06,
"loss": 0.7100467681884766,
"step": 1254
},
{
"epoch": 2.6553911205073994,
"grad_norm": 0.8442944288253784,
"learning_rate": 1.7568353664631528e-06,
"loss": 1.0310944318771362,
"step": 1256
},
{
"epoch": 2.6596194503171247,
"grad_norm": 0.7679892778396606,
"learning_rate": 1.7497822517811576e-06,
"loss": 0.3732684850692749,
"step": 1258
},
{
"epoch": 2.66384778012685,
"grad_norm": 2.0411362648010254,
"learning_rate": 1.7427413651629787e-06,
"loss": 0.5446974635124207,
"step": 1260
},
{
"epoch": 2.668076109936575,
"grad_norm": 7.280208587646484,
"learning_rate": 1.735712792686285e-06,
"loss": 0.7429797649383545,
"step": 1262
},
{
"epoch": 2.6723044397463003,
"grad_norm": 1.769396185874939,
"learning_rate": 1.7286966202781983e-06,
"loss": 0.7472846508026123,
"step": 1264
},
{
"epoch": 2.6765327695560255,
"grad_norm": 7.442590236663818,
"learning_rate": 1.7216929337142447e-06,
"loss": 0.4331527650356293,
"step": 1266
},
{
"epoch": 2.6807610993657507,
"grad_norm": 2.3331828117370605,
"learning_rate": 1.714701818617307e-06,
"loss": 0.7867488861083984,
"step": 1268
},
{
"epoch": 2.6849894291754755,
"grad_norm": 0.9535698890686035,
"learning_rate": 1.7077233604565758e-06,
"loss": 1.0159664154052734,
"step": 1270
},
{
"epoch": 2.689217758985201,
"grad_norm": 1.7152396440505981,
"learning_rate": 1.7007576445465054e-06,
"loss": 0.8122742176055908,
"step": 1272
},
{
"epoch": 2.693446088794926,
"grad_norm": 0.8219133019447327,
"learning_rate": 1.6938047560457716e-06,
"loss": 0.49331924319267273,
"step": 1274
},
{
"epoch": 2.697674418604651,
"grad_norm": 1.0974379777908325,
"learning_rate": 1.6868647799562296e-06,
"loss": 0.5021317601203918,
"step": 1276
},
{
"epoch": 2.7019027484143763,
"grad_norm": 1.0276836156845093,
"learning_rate": 1.6799378011218753e-06,
"loss": 1.0597912073135376,
"step": 1278
},
{
"epoch": 2.7061310782241015,
"grad_norm": 1.305923581123352,
"learning_rate": 1.6730239042278078e-06,
"loss": 0.7645857334136963,
"step": 1280
},
{
"epoch": 2.7103594080338267,
"grad_norm": 0.5968372225761414,
"learning_rate": 1.666123173799195e-06,
"loss": 1.1030560731887817,
"step": 1282
},
{
"epoch": 2.714587737843552,
"grad_norm": 1.4355765581130981,
"learning_rate": 1.659235694200238e-06,
"loss": 0.8181160092353821,
"step": 1284
},
{
"epoch": 2.718816067653277,
"grad_norm": 0.18032093346118927,
"learning_rate": 1.6523615496331417e-06,
"loss": 1.1456607580184937,
"step": 1286
},
{
"epoch": 2.723044397463002,
"grad_norm": 1.4734760522842407,
"learning_rate": 1.6455008241370874e-06,
"loss": 0.5729717016220093,
"step": 1288
},
{
"epoch": 2.7272727272727275,
"grad_norm": 0.7612594962120056,
"learning_rate": 1.6386536015871976e-06,
"loss": 1.0644044876098633,
"step": 1290
},
{
"epoch": 2.7315010570824523,
"grad_norm": 0.8396774530410767,
"learning_rate": 1.6318199656935195e-06,
"loss": 1.0980335474014282,
"step": 1292
},
{
"epoch": 2.7357293868921775,
"grad_norm": 0.7830753922462463,
"learning_rate": 1.6250000000000007e-06,
"loss": 1.054038405418396,
"step": 1294
},
{
"epoch": 2.7399577167019027,
"grad_norm": 0.3286767899990082,
"learning_rate": 1.618193787883458e-06,
"loss": 0.3908301591873169,
"step": 1296
},
{
"epoch": 2.744186046511628,
"grad_norm": 0.6592679619789124,
"learning_rate": 1.611401412552569e-06,
"loss": 0.5546691417694092,
"step": 1298
},
{
"epoch": 2.748414376321353,
"grad_norm": 0.5871724486351013,
"learning_rate": 1.604622957046854e-06,
"loss": 0.6974177360534668,
"step": 1300
},
{
"epoch": 2.7526427061310783,
"grad_norm": 3.476322650909424,
"learning_rate": 1.5978585042356526e-06,
"loss": 0.9717587828636169,
"step": 1302
},
{
"epoch": 2.7568710359408035,
"grad_norm": 1.426347017288208,
"learning_rate": 1.5911081368171174e-06,
"loss": 0.696022093296051,
"step": 1304
},
{
"epoch": 2.7610993657505283,
"grad_norm": 1.42485511302948,
"learning_rate": 1.5843719373172043e-06,
"loss": 0.8914967775344849,
"step": 1306
},
{
"epoch": 2.765327695560254,
"grad_norm": 0.9887190461158752,
"learning_rate": 1.5776499880886583e-06,
"loss": 0.8718952536582947,
"step": 1308
},
{
"epoch": 2.7695560253699787,
"grad_norm": 0.5860939621925354,
"learning_rate": 1.5709423713100066e-06,
"loss": 1.0336132049560547,
"step": 1310
},
{
"epoch": 2.773784355179704,
"grad_norm": 0.6642679572105408,
"learning_rate": 1.5642491689845623e-06,
"loss": 0.9066874980926514,
"step": 1312
},
{
"epoch": 2.778012684989429,
"grad_norm": 0.6993511319160461,
"learning_rate": 1.5575704629394118e-06,
"loss": 0.5353021025657654,
"step": 1314
},
{
"epoch": 2.7822410147991543,
"grad_norm": 0.8484950065612793,
"learning_rate": 1.550906334824419e-06,
"loss": 0.979564905166626,
"step": 1316
},
{
"epoch": 2.7864693446088795,
"grad_norm": 0.3303600251674652,
"learning_rate": 1.5442568661112273e-06,
"loss": 0.6826730966567993,
"step": 1318
},
{
"epoch": 2.7906976744186047,
"grad_norm": 3.2293996810913086,
"learning_rate": 1.5376221380922645e-06,
"loss": 0.9952559471130371,
"step": 1320
},
{
"epoch": 2.79492600422833,
"grad_norm": 2.9947149753570557,
"learning_rate": 1.5310022318797468e-06,
"loss": 0.5234836339950562,
"step": 1322
},
{
"epoch": 2.7991543340380547,
"grad_norm": 0.8693253397941589,
"learning_rate": 1.5243972284046843e-06,
"loss": 1.0908644199371338,
"step": 1324
},
{
"epoch": 2.8033826638477803,
"grad_norm": 1.8999295234680176,
"learning_rate": 1.5178072084159006e-06,
"loss": 0.30439692735671997,
"step": 1326
},
{
"epoch": 2.807610993657505,
"grad_norm": 1.2835586071014404,
"learning_rate": 1.5112322524790373e-06,
"loss": 0.3868151009082794,
"step": 1328
},
{
"epoch": 2.8118393234672303,
"grad_norm": 0.6148664355278015,
"learning_rate": 1.5046724409755708e-06,
"loss": 0.655669093132019,
"step": 1330
},
{
"epoch": 2.8160676532769555,
"grad_norm": 0.9131718277931213,
"learning_rate": 1.4981278541018338e-06,
"loss": 1.027086615562439,
"step": 1332
},
{
"epoch": 2.8202959830866807,
"grad_norm": 0.9928625226020813,
"learning_rate": 1.4915985718680303e-06,
"loss": 0.6656888723373413,
"step": 1334
},
{
"epoch": 2.824524312896406,
"grad_norm": 0.759575366973877,
"learning_rate": 1.4850846740972566e-06,
"loss": 1.0963438749313354,
"step": 1336
},
{
"epoch": 2.828752642706131,
"grad_norm": 1.3372092247009277,
"learning_rate": 1.478586240424532e-06,
"loss": 1.0407531261444092,
"step": 1338
},
{
"epoch": 2.8329809725158563,
"grad_norm": 1.6881779432296753,
"learning_rate": 1.4721033502958188e-06,
"loss": 0.8279685974121094,
"step": 1340
},
{
"epoch": 2.8372093023255816,
"grad_norm": 0.5533450245857239,
"learning_rate": 1.4656360829670524e-06,
"loss": 1.0067516565322876,
"step": 1342
},
{
"epoch": 2.8414376321353068,
"grad_norm": 0.5380539298057556,
"learning_rate": 1.4591845175031755e-06,
"loss": 0.7166640162467957,
"step": 1344
},
{
"epoch": 2.8456659619450315,
"grad_norm": 1.2064310312271118,
"learning_rate": 1.4527487327771667e-06,
"loss": 0.6947576403617859,
"step": 1346
},
{
"epoch": 2.8498942917547567,
"grad_norm": 0.6393163800239563,
"learning_rate": 1.44632880746908e-06,
"loss": 0.9154616594314575,
"step": 1348
},
{
"epoch": 2.854122621564482,
"grad_norm": 2.2646851539611816,
"learning_rate": 1.4399248200650822e-06,
"loss": 0.5946722626686096,
"step": 1350
},
{
"epoch": 2.858350951374207,
"grad_norm": 6.771807670593262,
"learning_rate": 1.4335368488564921e-06,
"loss": 0.9889756441116333,
"step": 1352
},
{
"epoch": 2.8625792811839323,
"grad_norm": 0.6024413108825684,
"learning_rate": 1.4271649719388235e-06,
"loss": 1.0145889520645142,
"step": 1354
},
{
"epoch": 2.8668076109936576,
"grad_norm": 6.5942912101745605,
"learning_rate": 1.420809267210832e-06,
"loss": 0.4889359176158905,
"step": 1356
},
{
"epoch": 2.8710359408033828,
"grad_norm": 1.2514537572860718,
"learning_rate": 1.4144698123735614e-06,
"loss": 1.060815453529358,
"step": 1358
},
{
"epoch": 2.875264270613108,
"grad_norm": 3.7441012859344482,
"learning_rate": 1.408146684929394e-06,
"loss": 0.795141875743866,
"step": 1360
},
{
"epoch": 2.879492600422833,
"grad_norm": 1.1793290376663208,
"learning_rate": 1.401839962181103e-06,
"loss": 0.7162335515022278,
"step": 1362
},
{
"epoch": 2.883720930232558,
"grad_norm": 1.296712875366211,
"learning_rate": 1.3955497212309082e-06,
"loss": 1.0849847793579102,
"step": 1364
},
{
"epoch": 2.887949260042283,
"grad_norm": 3.475389003753662,
"learning_rate": 1.389276038979532e-06,
"loss": 0.875495970249176,
"step": 1366
},
{
"epoch": 2.8921775898520083,
"grad_norm": 0.5793375372886658,
"learning_rate": 1.3830189921252605e-06,
"loss": 1.020584225654602,
"step": 1368
},
{
"epoch": 2.8964059196617336,
"grad_norm": 0.7720953226089478,
"learning_rate": 1.3767786571630054e-06,
"loss": 1.035544753074646,
"step": 1370
},
{
"epoch": 2.9006342494714588,
"grad_norm": 0.6335976123809814,
"learning_rate": 1.3705551103833687e-06,
"loss": 1.0688656568527222,
"step": 1372
},
{
"epoch": 2.904862579281184,
"grad_norm": 0.5152540802955627,
"learning_rate": 1.364348427871709e-06,
"loss": 0.8726412057876587,
"step": 1374
},
{
"epoch": 2.909090909090909,
"grad_norm": 0.8800987601280212,
"learning_rate": 1.3581586855072162e-06,
"loss": 1.01813542842865,
"step": 1376
},
{
"epoch": 2.9133192389006344,
"grad_norm": 3.5038645267486572,
"learning_rate": 1.3519859589619756e-06,
"loss": 0.7246266603469849,
"step": 1378
},
{
"epoch": 2.9175475687103596,
"grad_norm": 0.6655816435813904,
"learning_rate": 1.3458303237000483e-06,
"loss": 0.7696600556373596,
"step": 1380
},
{
"epoch": 2.9217758985200843,
"grad_norm": 0.9207125902175903,
"learning_rate": 1.3396918549765514e-06,
"loss": 1.0463422536849976,
"step": 1382
},
{
"epoch": 2.92600422832981,
"grad_norm": 1.4580425024032593,
"learning_rate": 1.3335706278367289e-06,
"loss": 0.9288692474365234,
"step": 1384
},
{
"epoch": 2.9302325581395348,
"grad_norm": 0.6088332533836365,
"learning_rate": 1.3274667171150422e-06,
"loss": 0.7489819526672363,
"step": 1386
},
{
"epoch": 2.93446088794926,
"grad_norm": 1.341879963874817,
"learning_rate": 1.3213801974342516e-06,
"loss": 1.0183134078979492,
"step": 1388
},
{
"epoch": 2.938689217758985,
"grad_norm": 0.258132666349411,
"learning_rate": 1.3153111432045079e-06,
"loss": 0.8709487318992615,
"step": 1390
},
{
"epoch": 2.9429175475687104,
"grad_norm": 0.5611593127250671,
"learning_rate": 1.309259628622435e-06,
"loss": 1.007150411605835,
"step": 1392
},
{
"epoch": 2.9471458773784356,
"grad_norm": 0.7201411724090576,
"learning_rate": 1.3032257276702296e-06,
"loss": 0.32561811804771423,
"step": 1394
},
{
"epoch": 2.951374207188161,
"grad_norm": 0.5295475721359253,
"learning_rate": 1.2972095141147578e-06,
"loss": 0.529960572719574,
"step": 1396
},
{
"epoch": 2.955602536997886,
"grad_norm": 2.0177695751190186,
"learning_rate": 1.2912110615066447e-06,
"loss": 0.9622781276702881,
"step": 1398
},
{
"epoch": 2.9598308668076108,
"grad_norm": 0.5386593341827393,
"learning_rate": 1.2852304431793838e-06,
"loss": 1.2505404949188232,
"step": 1400
},
{
"epoch": 2.9640591966173364,
"grad_norm": 4.687948226928711,
"learning_rate": 1.2792677322484386e-06,
"loss": 0.8016545176506042,
"step": 1402
},
{
"epoch": 2.968287526427061,
"grad_norm": 1.594322681427002,
"learning_rate": 1.2733230016103436e-06,
"loss": 0.5189470052719116,
"step": 1404
},
{
"epoch": 2.9725158562367864,
"grad_norm": 0.9102961421012878,
"learning_rate": 1.26739632394182e-06,
"loss": 0.9059958457946777,
"step": 1406
},
{
"epoch": 2.9767441860465116,
"grad_norm": 0.8692654371261597,
"learning_rate": 1.2614877716988845e-06,
"loss": 0.8937259316444397,
"step": 1408
},
{
"epoch": 2.980972515856237,
"grad_norm": 2.1760952472686768,
"learning_rate": 1.255597417115961e-06,
"loss": 0.833085834980011,
"step": 1410
},
{
"epoch": 2.985200845665962,
"grad_norm": 1.076922059059143,
"learning_rate": 1.249725332205e-06,
"loss": 1.064079999923706,
"step": 1412
},
{
"epoch": 2.989429175475687,
"grad_norm": 0.4375395178794861,
"learning_rate": 1.2438715887546002e-06,
"loss": 0.8243948221206665,
"step": 1414
},
{
"epoch": 2.9936575052854124,
"grad_norm": 2.233292579650879,
"learning_rate": 1.2380362583291272e-06,
"loss": 0.8824648261070251,
"step": 1416
},
{
"epoch": 2.997885835095137,
"grad_norm": 0.4582400321960449,
"learning_rate": 1.2322194122678375e-06,
"loss": 0.5593487620353699,
"step": 1418
},
{
"epoch": 3.0021141649048624,
"grad_norm": 0.6782700419425964,
"learning_rate": 1.226421121684014e-06,
"loss": 1.03118097782135,
"step": 1420
},
{
"epoch": 3.0063424947145876,
"grad_norm": 1.0694071054458618,
"learning_rate": 1.2206414574640868e-06,
"loss": 0.6397127509117126,
"step": 1422
},
{
"epoch": 3.010570824524313,
"grad_norm": 1.2534350156784058,
"learning_rate": 1.2148804902667736e-06,
"loss": 1.1392219066619873,
"step": 1424
},
{
"epoch": 3.014799154334038,
"grad_norm": 1.9186782836914062,
"learning_rate": 1.2091382905222132e-06,
"loss": 0.520480215549469,
"step": 1426
},
{
"epoch": 3.019027484143763,
"grad_norm": 0.8564389944076538,
"learning_rate": 1.2034149284311041e-06,
"loss": 0.6777791976928711,
"step": 1428
},
{
"epoch": 3.0232558139534884,
"grad_norm": 0.7034538388252258,
"learning_rate": 1.197710473963847e-06,
"loss": 0.8563777804374695,
"step": 1430
},
{
"epoch": 3.0274841437632136,
"grad_norm": 0.7909392714500427,
"learning_rate": 1.1920249968596902e-06,
"loss": 1.0257045030593872,
"step": 1432
},
{
"epoch": 3.031712473572939,
"grad_norm": 0.47468486428260803,
"learning_rate": 1.1863585666258748e-06,
"loss": 0.9145489931106567,
"step": 1434
},
{
"epoch": 3.035940803382664,
"grad_norm": 1.9823009967803955,
"learning_rate": 1.1807112525367876e-06,
"loss": 0.6615996360778809,
"step": 1436
},
{
"epoch": 3.040169133192389,
"grad_norm": 0.8342152833938599,
"learning_rate": 1.1750831236331117e-06,
"loss": 0.2739180326461792,
"step": 1438
},
{
"epoch": 3.044397463002114,
"grad_norm": 1.0090795755386353,
"learning_rate": 1.1694742487209842e-06,
"loss": 1.0122308731079102,
"step": 1440
},
{
"epoch": 3.048625792811839,
"grad_norm": 0.5868484973907471,
"learning_rate": 1.1638846963711545e-06,
"loss": 0.7627484798431396,
"step": 1442
},
{
"epoch": 3.0528541226215644,
"grad_norm": 2.0811753273010254,
"learning_rate": 1.1583145349181456e-06,
"loss": 0.21038176119327545,
"step": 1444
},
{
"epoch": 3.0570824524312896,
"grad_norm": 2.1059317588806152,
"learning_rate": 1.152763832459419e-06,
"loss": 0.6727972030639648,
"step": 1446
},
{
"epoch": 3.061310782241015,
"grad_norm": 2.8190877437591553,
"learning_rate": 1.1472326568545424e-06,
"loss": 0.7937036156654358,
"step": 1448
},
{
"epoch": 3.06553911205074,
"grad_norm": 0.6504483819007874,
"learning_rate": 1.1417210757243603e-06,
"loss": 0.7131494879722595,
"step": 1450
},
{
"epoch": 3.0697674418604652,
"grad_norm": 2.133265972137451,
"learning_rate": 1.136229156450165e-06,
"loss": 0.7005563378334045,
"step": 1452
},
{
"epoch": 3.0739957716701904,
"grad_norm": 0.7323704957962036,
"learning_rate": 1.1307569661728775e-06,
"loss": 0.9205468893051147,
"step": 1454
},
{
"epoch": 3.0782241014799157,
"grad_norm": 0.2374901920557022,
"learning_rate": 1.1253045717922215e-06,
"loss": 0.3031374216079712,
"step": 1456
},
{
"epoch": 3.0824524312896404,
"grad_norm": 1.803215742111206,
"learning_rate": 1.119872039965909e-06,
"loss": 0.7160661220550537,
"step": 1458
},
{
"epoch": 3.0866807610993656,
"grad_norm": 1.3725308179855347,
"learning_rate": 1.1144594371088245e-06,
"loss": 1.020361065864563,
"step": 1460
},
{
"epoch": 3.090909090909091,
"grad_norm": 0.6326039433479309,
"learning_rate": 1.1090668293922122e-06,
"loss": 0.971651554107666,
"step": 1462
},
{
"epoch": 3.095137420718816,
"grad_norm": 0.6070262789726257,
"learning_rate": 1.103694282742868e-06,
"loss": 0.6768549680709839,
"step": 1464
},
{
"epoch": 3.0993657505285412,
"grad_norm": 0.5303124785423279,
"learning_rate": 1.098341862842333e-06,
"loss": 0.7792209982872009,
"step": 1466
},
{
"epoch": 3.1035940803382664,
"grad_norm": 0.7507800459861755,
"learning_rate": 1.0930096351260913e-06,
"loss": 0.9888483881950378,
"step": 1468
},
{
"epoch": 3.1078224101479917,
"grad_norm": 0.6695652008056641,
"learning_rate": 1.0876976647827677e-06,
"loss": 0.9820244312286377,
"step": 1470
},
{
"epoch": 3.112050739957717,
"grad_norm": 4.5699615478515625,
"learning_rate": 1.0824060167533365e-06,
"loss": 0.6230260133743286,
"step": 1472
},
{
"epoch": 3.116279069767442,
"grad_norm": 1.4406520128250122,
"learning_rate": 1.0771347557303184e-06,
"loss": 1.0396496057510376,
"step": 1474
},
{
"epoch": 3.120507399577167,
"grad_norm": 0.8460061550140381,
"learning_rate": 1.0718839461569972e-06,
"loss": 0.9403010606765747,
"step": 1476
},
{
"epoch": 3.124735729386892,
"grad_norm": 2.2458934783935547,
"learning_rate": 1.0666536522266314e-06,
"loss": 0.4271532893180847,
"step": 1478
},
{
"epoch": 3.1289640591966172,
"grad_norm": 0.7539458870887756,
"learning_rate": 1.0614439378816634e-06,
"loss": 0.9892304539680481,
"step": 1480
},
{
"epoch": 3.1331923890063424,
"grad_norm": 0.8904014825820923,
"learning_rate": 1.0562548668129449e-06,
"loss": 0.9543983340263367,
"step": 1482
},
{
"epoch": 3.1374207188160677,
"grad_norm": 0.8391467928886414,
"learning_rate": 1.0510865024589558e-06,
"loss": 0.33414945006370544,
"step": 1484
},
{
"epoch": 3.141649048625793,
"grad_norm": 2.2866015434265137,
"learning_rate": 1.045938908005025e-06,
"loss": 1.0479934215545654,
"step": 1486
},
{
"epoch": 3.145877378435518,
"grad_norm": 0.8269973397254944,
"learning_rate": 1.0408121463825627e-06,
"loss": 1.0214964151382446,
"step": 1488
},
{
"epoch": 3.1501057082452433,
"grad_norm": 2.0901854038238525,
"learning_rate": 1.0357062802682905e-06,
"loss": 0.7124687433242798,
"step": 1490
},
{
"epoch": 3.1543340380549685,
"grad_norm": 2.464489459991455,
"learning_rate": 1.0306213720834738e-06,
"loss": 0.7923527956008911,
"step": 1492
},
{
"epoch": 3.1585623678646932,
"grad_norm": 0.5960375666618347,
"learning_rate": 1.0255574839931555e-06,
"loss": 0.5037514567375183,
"step": 1494
},
{
"epoch": 3.1627906976744184,
"grad_norm": 0.2680164575576782,
"learning_rate": 1.0205146779054037e-06,
"loss": 0.8170030117034912,
"step": 1496
},
{
"epoch": 3.1670190274841437,
"grad_norm": 0.6705971360206604,
"learning_rate": 1.0154930154705493e-06,
"loss": 0.9746053814888,
"step": 1498
},
{
"epoch": 3.171247357293869,
"grad_norm": 1.046158790588379,
"learning_rate": 1.0104925580804307e-06,
"loss": 1.0264575481414795,
"step": 1500
},
{
"epoch": 3.175475687103594,
"grad_norm": 2.6368725299835205,
"learning_rate": 1.0055133668676505e-06,
"loss": 0.46951693296432495,
"step": 1502
},
{
"epoch": 3.1797040169133193,
"grad_norm": 0.954997181892395,
"learning_rate": 1.0005555027048216e-06,
"loss": 0.5769892930984497,
"step": 1504
},
{
"epoch": 3.1839323467230445,
"grad_norm": 0.8056331276893616,
"learning_rate": 9.956190262038252e-07,
"loss": 0.7956379055976868,
"step": 1506
},
{
"epoch": 3.1881606765327697,
"grad_norm": 1.2383183240890503,
"learning_rate": 9.90703997715068e-07,
"loss": 0.4002586901187897,
"step": 1508
},
{
"epoch": 3.192389006342495,
"grad_norm": 3.1095306873321533,
"learning_rate": 9.8581047732675e-07,
"loss": 0.3678751289844513,
"step": 1510
},
{
"epoch": 3.1966173361522197,
"grad_norm": 0.2970428764820099,
"learning_rate": 9.809385248641244e-07,
"loss": 0.10512058436870575,
"step": 1512
},
{
"epoch": 3.200845665961945,
"grad_norm": 2.6694912910461426,
"learning_rate": 9.760881998887647e-07,
"loss": 0.7792633771896362,
"step": 1514
},
{
"epoch": 3.20507399577167,
"grad_norm": 1.5287692546844482,
"learning_rate": 9.712595616978445e-07,
"loss": 1.0101102590560913,
"step": 1516
},
{
"epoch": 3.2093023255813953,
"grad_norm": 0.7071142792701721,
"learning_rate": 9.66452669323406e-07,
"loss": 0.6074497699737549,
"step": 1518
},
{
"epoch": 3.2135306553911205,
"grad_norm": 1.0035736560821533,
"learning_rate": 9.616675815316373e-07,
"loss": 0.8396947383880615,
"step": 1520
},
{
"epoch": 3.2177589852008457,
"grad_norm": 0.7858723998069763,
"learning_rate": 9.569043568221613e-07,
"loss": 0.9395447969436646,
"step": 1522
},
{
"epoch": 3.221987315010571,
"grad_norm": 4.942752361297607,
"learning_rate": 9.52163053427313e-07,
"loss": 1.0000540018081665,
"step": 1524
},
{
"epoch": 3.226215644820296,
"grad_norm": 0.7960143685340881,
"learning_rate": 9.474437293114311e-07,
"loss": 0.948387086391449,
"step": 1526
},
{
"epoch": 3.2304439746300213,
"grad_norm": 2.0574419498443604,
"learning_rate": 9.427464421701493e-07,
"loss": 0.2774934768676758,
"step": 1528
},
{
"epoch": 3.234672304439746,
"grad_norm": 1.1152596473693848,
"learning_rate": 9.380712494296898e-07,
"loss": 0.823591411113739,
"step": 1530
},
{
"epoch": 3.2389006342494713,
"grad_norm": 2.095369338989258,
"learning_rate": 9.334182082461624e-07,
"loss": 0.8626236319541931,
"step": 1532
},
{
"epoch": 3.2431289640591965,
"grad_norm": 0.8906185626983643,
"learning_rate": 9.287873755048647e-07,
"loss": 0.9925634264945984,
"step": 1534
},
{
"epoch": 3.2473572938689217,
"grad_norm": 0.876634955406189,
"learning_rate": 9.241788078195874e-07,
"loss": 0.8858959078788757,
"step": 1536
},
{
"epoch": 3.251585623678647,
"grad_norm": 0.8159791231155396,
"learning_rate": 9.195925615319221e-07,
"loss": 0.7304887175559998,
"step": 1538
},
{
"epoch": 3.255813953488372,
"grad_norm": 0.8356714248657227,
"learning_rate": 9.150286927105726e-07,
"loss": 0.6133416891098022,
"step": 1540
},
{
"epoch": 3.2600422832980973,
"grad_norm": 1.4572813510894775,
"learning_rate": 9.104872571506682e-07,
"loss": 1.211620807647705,
"step": 1542
},
{
"epoch": 3.2642706131078225,
"grad_norm": 0.5943049788475037,
"learning_rate": 9.059683103730835e-07,
"loss": 0.9767951369285583,
"step": 1544
},
{
"epoch": 3.2684989429175477,
"grad_norm": 1.6723552942276,
"learning_rate": 9.014719076237579e-07,
"loss": 0.9184189438819885,
"step": 1546
},
{
"epoch": 3.2727272727272725,
"grad_norm": 0.5673151016235352,
"learning_rate": 8.969981038730224e-07,
"loss": 0.3618415892124176,
"step": 1548
},
{
"epoch": 3.276955602536998,
"grad_norm": 1.0060195922851562,
"learning_rate": 8.925469538149245e-07,
"loss": 0.9330455660820007,
"step": 1550
},
{
"epoch": 3.281183932346723,
"grad_norm": 0.9557608366012573,
"learning_rate": 8.881185118665616e-07,
"loss": 1.0155820846557617,
"step": 1552
},
{
"epoch": 3.285412262156448,
"grad_norm": 0.13276489078998566,
"learning_rate": 8.837128321674174e-07,
"loss": 0.1570519506931305,
"step": 1554
},
{
"epoch": 3.2896405919661733,
"grad_norm": 0.714574933052063,
"learning_rate": 8.793299685786944e-07,
"loss": 0.942793607711792,
"step": 1556
},
{
"epoch": 3.2938689217758985,
"grad_norm": 0.9168136715888977,
"learning_rate": 8.749699746826612e-07,
"loss": 0.5292172431945801,
"step": 1558
},
{
"epoch": 3.2980972515856237,
"grad_norm": 1.3022035360336304,
"learning_rate": 8.706329037819961e-07,
"loss": 1.1990944147109985,
"step": 1560
},
{
"epoch": 3.302325581395349,
"grad_norm": 1.6504409313201904,
"learning_rate": 8.663188088991317e-07,
"loss": 0.7757396697998047,
"step": 1562
},
{
"epoch": 3.306553911205074,
"grad_norm": 1.3289718627929688,
"learning_rate": 8.620277427756112e-07,
"loss": 0.5169369578361511,
"step": 1564
},
{
"epoch": 3.3107822410147993,
"grad_norm": 0.875095546245575,
"learning_rate": 8.577597578714439e-07,
"loss": 0.7265094518661499,
"step": 1566
},
{
"epoch": 3.3150105708245245,
"grad_norm": 0.33962443470954895,
"learning_rate": 8.53514906364458e-07,
"loss": 0.12319551408290863,
"step": 1568
},
{
"epoch": 3.3192389006342493,
"grad_norm": 1.1597821712493896,
"learning_rate": 8.492932401496683e-07,
"loss": 0.5623422861099243,
"step": 1570
},
{
"epoch": 3.3234672304439745,
"grad_norm": 3.1550745964050293,
"learning_rate": 8.45094810838642e-07,
"loss": 0.7283601760864258,
"step": 1572
},
{
"epoch": 3.3276955602536997,
"grad_norm": 1.145011305809021,
"learning_rate": 8.40919669758864e-07,
"loss": 0.26868027448654175,
"step": 1574
},
{
"epoch": 3.331923890063425,
"grad_norm": 3.8039538860321045,
"learning_rate": 8.3676786795311e-07,
"loss": 0.4419690668582916,
"step": 1576
},
{
"epoch": 3.33615221987315,
"grad_norm": 0.6252729892730713,
"learning_rate": 8.326394561788257e-07,
"loss": 0.5640559196472168,
"step": 1578
},
{
"epoch": 3.3403805496828753,
"grad_norm": 0.55072021484375,
"learning_rate": 8.285344849075047e-07,
"loss": 0.6380379796028137,
"step": 1580
},
{
"epoch": 3.3446088794926006,
"grad_norm": 1.0008291006088257,
"learning_rate": 8.244530043240687e-07,
"loss": 0.98517906665802,
"step": 1582
},
{
"epoch": 3.3488372093023258,
"grad_norm": 1.933143138885498,
"learning_rate": 8.203950643262576e-07,
"loss": 0.717485785484314,
"step": 1584
},
{
"epoch": 3.353065539112051,
"grad_norm": 0.8978578448295593,
"learning_rate": 8.163607145240191e-07,
"loss": 0.6533565521240234,
"step": 1586
},
{
"epoch": 3.3572938689217757,
"grad_norm": 1.672323226928711,
"learning_rate": 8.123500042389003e-07,
"loss": 1.1361911296844482,
"step": 1588
},
{
"epoch": 3.361522198731501,
"grad_norm": 0.7658936381340027,
"learning_rate": 8.083629825034443e-07,
"loss": 0.6171827912330627,
"step": 1590
},
{
"epoch": 3.365750528541226,
"grad_norm": 1.5416232347488403,
"learning_rate": 8.043996980605952e-07,
"loss": 0.8929522633552551,
"step": 1592
},
{
"epoch": 3.3699788583509513,
"grad_norm": 0.6201046109199524,
"learning_rate": 8.004601993630979e-07,
"loss": 0.4101506471633911,
"step": 1594
},
{
"epoch": 3.3742071881606766,
"grad_norm": 0.8717901706695557,
"learning_rate": 7.965445345729045e-07,
"loss": 0.9818314909934998,
"step": 1596
},
{
"epoch": 3.3784355179704018,
"grad_norm": 2.7879254817962646,
"learning_rate": 7.926527515605922e-07,
"loss": 0.644636332988739,
"step": 1598
},
{
"epoch": 3.382663847780127,
"grad_norm": 1.3160312175750732,
"learning_rate": 7.88784897904772e-07,
"loss": 0.41142430901527405,
"step": 1600
},
{
"epoch": 3.386892177589852,
"grad_norm": 1.4033868312835693,
"learning_rate": 7.849410208915069e-07,
"loss": 0.5842673778533936,
"step": 1602
},
{
"epoch": 3.3911205073995774,
"grad_norm": 1.1946991682052612,
"learning_rate": 7.811211675137392e-07,
"loss": 1.0320261716842651,
"step": 1604
},
{
"epoch": 3.395348837209302,
"grad_norm": 0.639847993850708,
"learning_rate": 7.773253844707108e-07,
"loss": 1.0384889841079712,
"step": 1606
},
{
"epoch": 3.3995771670190273,
"grad_norm": 4.001772403717041,
"learning_rate": 7.735537181673947e-07,
"loss": 0.6584277749061584,
"step": 1608
},
{
"epoch": 3.4038054968287526,
"grad_norm": 0.6378755569458008,
"learning_rate": 7.69806214713926e-07,
"loss": 1.018156886100769,
"step": 1610
},
{
"epoch": 3.4080338266384778,
"grad_norm": 0.7776346802711487,
"learning_rate": 7.660829199250404e-07,
"loss": 0.8746322393417358,
"step": 1612
},
{
"epoch": 3.412262156448203,
"grad_norm": 1.3227170705795288,
"learning_rate": 7.623838793195128e-07,
"loss": 0.8452064990997314,
"step": 1614
},
{
"epoch": 3.416490486257928,
"grad_norm": 1.253333330154419,
"learning_rate": 7.587091381196004e-07,
"loss": 0.9873075485229492,
"step": 1616
},
{
"epoch": 3.4207188160676534,
"grad_norm": 0.5858563184738159,
"learning_rate": 7.550587412504907e-07,
"loss": 0.9376651644706726,
"step": 1618
},
{
"epoch": 3.4249471458773786,
"grad_norm": 1.6567012071609497,
"learning_rate": 7.514327333397521e-07,
"loss": 0.9783826470375061,
"step": 1620
},
{
"epoch": 3.429175475687104,
"grad_norm": 7.168039321899414,
"learning_rate": 7.47831158716788e-07,
"loss": 0.6209827661514282,
"step": 1622
},
{
"epoch": 3.4334038054968286,
"grad_norm": 0.9959341883659363,
"learning_rate": 7.442540614122954e-07,
"loss": 0.9962281584739685,
"step": 1624
},
{
"epoch": 3.4376321353065538,
"grad_norm": 0.6434539556503296,
"learning_rate": 7.407014851577257e-07,
"loss": 0.7141914367675781,
"step": 1626
},
{
"epoch": 3.441860465116279,
"grad_norm": 1.174318552017212,
"learning_rate": 7.371734733847509e-07,
"loss": 0.9825333952903748,
"step": 1628
},
{
"epoch": 3.446088794926004,
"grad_norm": 3.244459867477417,
"learning_rate": 7.336700692247326e-07,
"loss": 0.598316490650177,
"step": 1630
},
{
"epoch": 3.4503171247357294,
"grad_norm": 0.6823888421058655,
"learning_rate": 7.301913155081937e-07,
"loss": 0.9444507360458374,
"step": 1632
},
{
"epoch": 3.4545454545454546,
"grad_norm": 3.044529676437378,
"learning_rate": 7.267372547642965e-07,
"loss": 0.6880492568016052,
"step": 1634
},
{
"epoch": 3.45877378435518,
"grad_norm": 0.7098934650421143,
"learning_rate": 7.23307929220321e-07,
"loss": 0.8510515689849854,
"step": 1636
},
{
"epoch": 3.463002114164905,
"grad_norm": 2.740060806274414,
"learning_rate": 7.199033808011497e-07,
"loss": 0.4582882225513458,
"step": 1638
},
{
"epoch": 3.46723044397463,
"grad_norm": 0.6570109724998474,
"learning_rate": 7.16523651128755e-07,
"loss": 0.5597135424613953,
"step": 1640
},
{
"epoch": 3.471458773784355,
"grad_norm": 0.6645305156707764,
"learning_rate": 7.131687815216901e-07,
"loss": 0.22359013557434082,
"step": 1642
},
{
"epoch": 3.47568710359408,
"grad_norm": 0.8287932872772217,
"learning_rate": 7.098388129945833e-07,
"loss": 0.9671212434768677,
"step": 1644
},
{
"epoch": 3.4799154334038054,
"grad_norm": 1.3744875192642212,
"learning_rate": 7.065337862576381e-07,
"loss": 0.9185785055160522,
"step": 1646
},
{
"epoch": 3.4841437632135306,
"grad_norm": 0.5585587024688721,
"learning_rate": 7.032537417161339e-07,
"loss": 0.5719754695892334,
"step": 1648
},
{
"epoch": 3.488372093023256,
"grad_norm": 0.6185411810874939,
"learning_rate": 6.999987194699334e-07,
"loss": 0.5411649942398071,
"step": 1650
},
{
"epoch": 3.492600422832981,
"grad_norm": 1.760116457939148,
"learning_rate": 6.967687593129909e-07,
"loss": 0.6113811731338501,
"step": 1652
},
{
"epoch": 3.496828752642706,
"grad_norm": 1.6597703695297241,
"learning_rate": 6.935639007328666e-07,
"loss": 0.9229161143302917,
"step": 1654
},
{
"epoch": 3.5010570824524314,
"grad_norm": 0.8579858541488647,
"learning_rate": 6.903841829102457e-07,
"loss": 0.9809255003929138,
"step": 1656
},
{
"epoch": 3.5052854122621566,
"grad_norm": 0.428204208612442,
"learning_rate": 6.872296447184546e-07,
"loss": 0.843367338180542,
"step": 1658
},
{
"epoch": 3.5095137420718814,
"grad_norm": 0.5644919276237488,
"learning_rate": 6.841003247229903e-07,
"loss": 0.6564947962760925,
"step": 1660
},
{
"epoch": 3.513742071881607,
"grad_norm": 4.3857879638671875,
"learning_rate": 6.80996261181048e-07,
"loss": 0.6874603629112244,
"step": 1662
},
{
"epoch": 3.517970401691332,
"grad_norm": 2.124926805496216,
"learning_rate": 6.779174920410505e-07,
"loss": 0.9908625483512878,
"step": 1664
},
{
"epoch": 3.522198731501057,
"grad_norm": 1.0560848712921143,
"learning_rate": 6.748640549421873e-07,
"loss": 1.0359817743301392,
"step": 1666
},
{
"epoch": 3.526427061310782,
"grad_norm": 0.21484586596488953,
"learning_rate": 6.71835987213955e-07,
"loss": 0.2849699854850769,
"step": 1668
},
{
"epoch": 3.5306553911205074,
"grad_norm": 0.4259510040283203,
"learning_rate": 6.688333258756966e-07,
"loss": 0.8330371975898743,
"step": 1670
},
{
"epoch": 3.5348837209302326,
"grad_norm": 0.9718641042709351,
"learning_rate": 6.658561076361539e-07,
"loss": 0.6728772521018982,
"step": 1672
},
{
"epoch": 3.539112050739958,
"grad_norm": 1.3043420314788818,
"learning_rate": 6.629043688930161e-07,
"loss": 1.06952702999115,
"step": 1674
},
{
"epoch": 3.543340380549683,
"grad_norm": 2.601339340209961,
"learning_rate": 6.599781457324759e-07,
"loss": 0.7122786641120911,
"step": 1676
},
{
"epoch": 3.547568710359408,
"grad_norm": 0.5953323841094971,
"learning_rate": 6.570774739287855e-07,
"loss": 0.9681164026260376,
"step": 1678
},
{
"epoch": 3.5517970401691334,
"grad_norm": 0.6733061075210571,
"learning_rate": 6.542023889438244e-07,
"loss": 0.660723090171814,
"step": 1680
},
{
"epoch": 3.556025369978858,
"grad_norm": 0.5774943828582764,
"learning_rate": 6.513529259266614e-07,
"loss": 0.6790302991867065,
"step": 1682
},
{
"epoch": 3.5602536997885834,
"grad_norm": 1.2416257858276367,
"learning_rate": 6.485291197131258e-07,
"loss": 0.6007125377655029,
"step": 1684
},
{
"epoch": 3.5644820295983086,
"grad_norm": 1.9640257358551025,
"learning_rate": 6.45731004825384e-07,
"loss": 0.29832443594932556,
"step": 1686
},
{
"epoch": 3.568710359408034,
"grad_norm": 1.3899872303009033,
"learning_rate": 6.429586154715143e-07,
"loss": 0.7014768719673157,
"step": 1688
},
{
"epoch": 3.572938689217759,
"grad_norm": 0.20190729200839996,
"learning_rate": 6.402119855450905e-07,
"loss": 0.33684778213500977,
"step": 1690
},
{
"epoch": 3.5771670190274842,
"grad_norm": 1.3706597089767456,
"learning_rate": 6.374911486247666e-07,
"loss": 0.4806325137615204,
"step": 1692
},
{
"epoch": 3.5813953488372094,
"grad_norm": 1.9231373071670532,
"learning_rate": 6.347961379738678e-07,
"loss": 0.6597048044204712,
"step": 1694
},
{
"epoch": 3.585623678646934,
"grad_norm": 3.228268623352051,
"learning_rate": 6.321269865399811e-07,
"loss": 0.44895780086517334,
"step": 1696
},
{
"epoch": 3.58985200845666,
"grad_norm": 0.6945326328277588,
"learning_rate": 6.294837269545557e-07,
"loss": 0.9701504111289978,
"step": 1698
},
{
"epoch": 3.5940803382663846,
"grad_norm": 0.8373157978057861,
"learning_rate": 6.268663915325021e-07,
"loss": 1.074630856513977,
"step": 1700
},
{
"epoch": 3.59830866807611,
"grad_norm": 1.8083549737930298,
"learning_rate": 6.24275012271797e-07,
"loss": 1.0470349788665771,
"step": 1702
},
{
"epoch": 3.602536997885835,
"grad_norm": 3.190058469772339,
"learning_rate": 6.217096208530931e-07,
"loss": 0.4534735679626465,
"step": 1704
},
{
"epoch": 3.6067653276955602,
"grad_norm": 0.6707348823547363,
"learning_rate": 6.191702486393313e-07,
"loss": 0.5571319460868835,
"step": 1706
},
{
"epoch": 3.6109936575052854,
"grad_norm": 2.686514377593994,
"learning_rate": 6.166569266753569e-07,
"loss": 0.8430109620094299,
"step": 1708
},
{
"epoch": 3.6152219873150107,
"grad_norm": 1.0745434761047363,
"learning_rate": 6.141696856875408e-07,
"loss": 0.8707183599472046,
"step": 1710
},
{
"epoch": 3.619450317124736,
"grad_norm": 1.010704517364502,
"learning_rate": 6.117085560834034e-07,
"loss": 0.5877060890197754,
"step": 1712
},
{
"epoch": 3.6236786469344606,
"grad_norm": 0.396779328584671,
"learning_rate": 6.092735679512427e-07,
"loss": 0.49770990014076233,
"step": 1714
},
{
"epoch": 3.6279069767441863,
"grad_norm": 3.3722569942474365,
"learning_rate": 6.068647510597671e-07,
"loss": 0.7864755988121033,
"step": 1716
},
{
"epoch": 3.632135306553911,
"grad_norm": 0.5849418044090271,
"learning_rate": 6.044821348577306e-07,
"loss": 0.588261604309082,
"step": 1718
},
{
"epoch": 3.6363636363636362,
"grad_norm": 1.558585286140442,
"learning_rate": 6.021257484735737e-07,
"loss": 0.7706260681152344,
"step": 1720
},
{
"epoch": 3.6405919661733614,
"grad_norm": 0.7773210406303406,
"learning_rate": 5.997956207150664e-07,
"loss": 0.8451033234596252,
"step": 1722
},
{
"epoch": 3.6448202959830867,
"grad_norm": 1.4222577810287476,
"learning_rate": 5.974917800689572e-07,
"loss": 0.7600279450416565,
"step": 1724
},
{
"epoch": 3.649048625792812,
"grad_norm": 0.6138176918029785,
"learning_rate": 5.952142547006232e-07,
"loss": 1.0202842950820923,
"step": 1726
},
{
"epoch": 3.653276955602537,
"grad_norm": 0.6351314783096313,
"learning_rate": 5.92963072453727e-07,
"loss": 0.9493424296379089,
"step": 1728
},
{
"epoch": 3.6575052854122623,
"grad_norm": 1.68190598487854,
"learning_rate": 5.907382608498761e-07,
"loss": 0.8003555536270142,
"step": 1730
},
{
"epoch": 3.6617336152219875,
"grad_norm": 0.9876241683959961,
"learning_rate": 5.885398470882863e-07,
"loss": 0.9022297263145447,
"step": 1732
},
{
"epoch": 3.6659619450317127,
"grad_norm": 1.069425106048584,
"learning_rate": 5.863678580454489e-07,
"loss": 0.9579256772994995,
"step": 1734
},
{
"epoch": 3.6701902748414374,
"grad_norm": 0.5899412035942078,
"learning_rate": 5.842223202748026e-07,
"loss": 1.0141502618789673,
"step": 1736
},
{
"epoch": 3.6744186046511627,
"grad_norm": 2.7078421115875244,
"learning_rate": 5.821032600064089e-07,
"loss": 0.31864723563194275,
"step": 1738
},
{
"epoch": 3.678646934460888,
"grad_norm": 1.3227430582046509,
"learning_rate": 5.800107031466306e-07,
"loss": 0.52090984582901,
"step": 1740
},
{
"epoch": 3.682875264270613,
"grad_norm": 1.3572659492492676,
"learning_rate": 5.779446752778158e-07,
"loss": 0.40007254481315613,
"step": 1742
},
{
"epoch": 3.6871035940803383,
"grad_norm": 0.9358891248703003,
"learning_rate": 5.759052016579858e-07,
"loss": 0.9531795382499695,
"step": 1744
},
{
"epoch": 3.6913319238900635,
"grad_norm": 0.22946986556053162,
"learning_rate": 5.738923072205247e-07,
"loss": 0.6118672490119934,
"step": 1746
},
{
"epoch": 3.6955602536997887,
"grad_norm": 1.7882148027420044,
"learning_rate": 5.719060165738753e-07,
"loss": 0.5476849675178528,
"step": 1748
},
{
"epoch": 3.699788583509514,
"grad_norm": 0.6446103453636169,
"learning_rate": 5.699463540012398e-07,
"loss": 1.0358470678329468,
"step": 1750
},
{
"epoch": 3.704016913319239,
"grad_norm": 1.7342896461486816,
"learning_rate": 5.680133434602796e-07,
"loss": 0.43426331877708435,
"step": 1752
},
{
"epoch": 3.708245243128964,
"grad_norm": 3.078566789627075,
"learning_rate": 5.661070085828253e-07,
"loss": 0.5601077079772949,
"step": 1754
},
{
"epoch": 3.712473572938689,
"grad_norm": 0.696753978729248,
"learning_rate": 5.642273726745867e-07,
"loss": 0.8815577030181885,
"step": 1756
},
{
"epoch": 3.7167019027484143,
"grad_norm": 3.4322452545166016,
"learning_rate": 5.623744587148686e-07,
"loss": 0.55597984790802,
"step": 1758
},
{
"epoch": 3.7209302325581395,
"grad_norm": 0.21880486607551575,
"learning_rate": 5.605482893562872e-07,
"loss": 0.49099811911582947,
"step": 1760
},
{
"epoch": 3.7251585623678647,
"grad_norm": 0.806815505027771,
"learning_rate": 5.587488869244977e-07,
"loss": 0.9834616184234619,
"step": 1762
},
{
"epoch": 3.72938689217759,
"grad_norm": 0.905757486820221,
"learning_rate": 5.569762734179175e-07,
"loss": 0.5867785215377808,
"step": 1764
},
{
"epoch": 3.733615221987315,
"grad_norm": 1.050884485244751,
"learning_rate": 5.552304705074587e-07,
"loss": 0.8268157243728638,
"step": 1766
},
{
"epoch": 3.7378435517970403,
"grad_norm": 3.77276611328125,
"learning_rate": 5.535114995362631e-07,
"loss": 0.9136216044425964,
"step": 1768
},
{
"epoch": 3.7420718816067655,
"grad_norm": 0.35950765013694763,
"learning_rate": 5.518193815194421e-07,
"loss": 0.3232070505619049,
"step": 1770
},
{
"epoch": 3.7463002114164903,
"grad_norm": 1.1717166900634766,
"learning_rate": 5.50154137143818e-07,
"loss": 0.586397111415863,
"step": 1772
},
{
"epoch": 3.7505285412262155,
"grad_norm": 1.6452980041503906,
"learning_rate": 5.485157867676717e-07,
"loss": 1.2943792343139648,
"step": 1774
},
{
"epoch": 3.7547568710359407,
"grad_norm": 1.183484673500061,
"learning_rate": 5.469043504204954e-07,
"loss": 1.0138071775436401,
"step": 1776
},
{
"epoch": 3.758985200845666,
"grad_norm": 0.6358800530433655,
"learning_rate": 5.453198478027459e-07,
"loss": 1.0095187425613403,
"step": 1778
},
{
"epoch": 3.763213530655391,
"grad_norm": 1.2916791439056396,
"learning_rate": 5.437622982856039e-07,
"loss": 1.0655014514923096,
"step": 1780
},
{
"epoch": 3.7674418604651163,
"grad_norm": 1.0883994102478027,
"learning_rate": 5.422317209107381e-07,
"loss": 0.856255829334259,
"step": 1782
},
{
"epoch": 3.7716701902748415,
"grad_norm": 5.774519443511963,
"learning_rate": 5.407281343900724e-07,
"loss": 0.20018130540847778,
"step": 1784
},
{
"epoch": 3.7758985200845667,
"grad_norm": 1.4228441715240479,
"learning_rate": 5.392515571055551e-07,
"loss": 0.7519955039024353,
"step": 1786
},
{
"epoch": 3.780126849894292,
"grad_norm": 2.516164779663086,
"learning_rate": 5.378020071089375e-07,
"loss": 0.6696423292160034,
"step": 1788
},
{
"epoch": 3.7843551797040167,
"grad_norm": 1.3914198875427246,
"learning_rate": 5.363795021215504e-07,
"loss": 0.354766309261322,
"step": 1790
},
{
"epoch": 3.7885835095137423,
"grad_norm": 0.269010454416275,
"learning_rate": 5.349840595340888e-07,
"loss": 0.953768253326416,
"step": 1792
},
{
"epoch": 3.792811839323467,
"grad_norm": 0.7044401168823242,
"learning_rate": 5.33615696406399e-07,
"loss": 0.9254974722862244,
"step": 1794
},
{
"epoch": 3.7970401691331923,
"grad_norm": 2.0106935501098633,
"learning_rate": 5.322744294672698e-07,
"loss": 0.5682697296142578,
"step": 1796
},
{
"epoch": 3.8012684989429175,
"grad_norm": 2.6919407844543457,
"learning_rate": 5.309602751142287e-07,
"loss": 0.9588193297386169,
"step": 1798
},
{
"epoch": 3.8054968287526427,
"grad_norm": 1.6973198652267456,
"learning_rate": 5.296732494133406e-07,
"loss": 1.0144344568252563,
"step": 1800
},
{
"epoch": 3.809725158562368,
"grad_norm": 1.7578473091125488,
"learning_rate": 5.284133680990113e-07,
"loss": 0.7145028114318848,
"step": 1802
},
{
"epoch": 3.813953488372093,
"grad_norm": 0.8779058456420898,
"learning_rate": 5.271806465737967e-07,
"loss": 0.9277461767196655,
"step": 1804
},
{
"epoch": 3.8181818181818183,
"grad_norm": 0.7843415141105652,
"learning_rate": 5.259750999082123e-07,
"loss": 1.0387165546417236,
"step": 1806
},
{
"epoch": 3.822410147991543,
"grad_norm": 1.58511483669281,
"learning_rate": 5.247967428405505e-07,
"loss": 0.1425338089466095,
"step": 1808
},
{
"epoch": 3.8266384778012688,
"grad_norm": 1.0016520023345947,
"learning_rate": 5.236455897766998e-07,
"loss": 0.9441636204719543,
"step": 1810
},
{
"epoch": 3.8308668076109935,
"grad_norm": 0.822861909866333,
"learning_rate": 5.22521654789969e-07,
"loss": 1.029585838317871,
"step": 1812
},
{
"epoch": 3.8350951374207187,
"grad_norm": 0.6523001194000244,
"learning_rate": 5.214249516209148e-07,
"loss": 0.7822322249412537,
"step": 1814
},
{
"epoch": 3.839323467230444,
"grad_norm": 0.8254392743110657,
"learning_rate": 5.203554936771742e-07,
"loss": 0.6645534634590149,
"step": 1816
},
{
"epoch": 3.843551797040169,
"grad_norm": 1.8470152616500854,
"learning_rate": 5.193132940332998e-07,
"loss": 0.6678524613380432,
"step": 1818
},
{
"epoch": 3.8477801268498943,
"grad_norm": 0.7378912568092346,
"learning_rate": 5.182983654306015e-07,
"loss": 0.660444438457489,
"step": 1820
},
{
"epoch": 3.8520084566596196,
"grad_norm": 0.21690633893013,
"learning_rate": 5.173107202769891e-07,
"loss": 0.77535080909729,
"step": 1822
},
{
"epoch": 3.8562367864693448,
"grad_norm": 0.9497125148773193,
"learning_rate": 5.163503706468209e-07,
"loss": 0.6644335389137268,
"step": 1824
},
{
"epoch": 3.8604651162790695,
"grad_norm": 2.4505763053894043,
"learning_rate": 5.154173282807579e-07,
"loss": 0.6357966065406799,
"step": 1826
},
{
"epoch": 3.864693446088795,
"grad_norm": 0.5043659806251526,
"learning_rate": 5.145116045856168e-07,
"loss": 0.9884635210037231,
"step": 1828
},
{
"epoch": 3.86892177589852,
"grad_norm": 1.1076487302780151,
"learning_rate": 5.136332106342344e-07,
"loss": 1.014207124710083,
"step": 1830
},
{
"epoch": 3.873150105708245,
"grad_norm": 0.912322461605072,
"learning_rate": 5.127821571653295e-07,
"loss": 0.7557728886604309,
"step": 1832
},
{
"epoch": 3.8773784355179703,
"grad_norm": 4.824005126953125,
"learning_rate": 5.119584545833723e-07,
"loss": 0.5752384066581726,
"step": 1834
},
{
"epoch": 3.8816067653276956,
"grad_norm": 0.30750563740730286,
"learning_rate": 5.111621129584585e-07,
"loss": 0.6163195371627808,
"step": 1836
},
{
"epoch": 3.8858350951374208,
"grad_norm": 0.22983339428901672,
"learning_rate": 5.103931420261836e-07,
"loss": 0.5606608986854553,
"step": 1838
},
{
"epoch": 3.890063424947146,
"grad_norm": 1.0208609104156494,
"learning_rate": 5.096515511875267e-07,
"loss": 0.9524738788604736,
"step": 1840
},
{
"epoch": 3.894291754756871,
"grad_norm": 0.7148008942604065,
"learning_rate": 5.08937349508734e-07,
"loss": 0.9512585997581482,
"step": 1842
},
{
"epoch": 3.898520084566596,
"grad_norm": 0.737912654876709,
"learning_rate": 5.082505457212071e-07,
"loss": 0.6485314965248108,
"step": 1844
},
{
"epoch": 3.9027484143763216,
"grad_norm": 8.045441627502441,
"learning_rate": 5.07591148221399e-07,
"loss": 0.5995697379112244,
"step": 1846
},
{
"epoch": 3.9069767441860463,
"grad_norm": 1.6817905902862549,
"learning_rate": 5.069591650707088e-07,
"loss": 0.21968799829483032,
"step": 1848
},
{
"epoch": 3.9112050739957716,
"grad_norm": 0.6323149800300598,
"learning_rate": 5.063546039953841e-07,
"loss": 0.9831611514091492,
"step": 1850
},
{
"epoch": 3.9154334038054968,
"grad_norm": 0.21932940185070038,
"learning_rate": 5.057774723864276e-07,
"loss": 0.584568977355957,
"step": 1852
},
{
"epoch": 3.919661733615222,
"grad_norm": 0.534582793712616,
"learning_rate": 5.052277772995044e-07,
"loss": 0.9615625143051147,
"step": 1854
},
{
"epoch": 3.923890063424947,
"grad_norm": 1.219119668006897,
"learning_rate": 5.04705525454858e-07,
"loss": 0.5662239193916321,
"step": 1856
},
{
"epoch": 3.9281183932346724,
"grad_norm": 1.430123209953308,
"learning_rate": 5.042107232372275e-07,
"loss": 0.8200953006744385,
"step": 1858
},
{
"epoch": 3.9323467230443976,
"grad_norm": 1.7414133548736572,
"learning_rate": 5.037433766957684e-07,
"loss": 0.35313427448272705,
"step": 1860
},
{
"epoch": 3.9365750528541223,
"grad_norm": 2.733624219894409,
"learning_rate": 5.033034915439797e-07,
"loss": 1.0163811445236206,
"step": 1862
},
{
"epoch": 3.940803382663848,
"grad_norm": 1.1092981100082397,
"learning_rate": 5.028910731596344e-07,
"loss": 0.9771573543548584,
"step": 1864
},
{
"epoch": 3.9450317124735728,
"grad_norm": 1.123976707458496,
"learning_rate": 5.02506126584713e-07,
"loss": 0.9256449937820435,
"step": 1866
},
{
"epoch": 3.949260042283298,
"grad_norm": 0.1970531940460205,
"learning_rate": 5.021486565253419e-07,
"loss": 0.006525847129523754,
"step": 1868
},
{
"epoch": 3.953488372093023,
"grad_norm": 1.2699693441390991,
"learning_rate": 5.01818667351736e-07,
"loss": 0.9931344389915466,
"step": 1870
},
{
"epoch": 3.9577167019027484,
"grad_norm": 0.8406357765197754,
"learning_rate": 5.015161630981461e-07,
"loss": 0.7480917572975159,
"step": 1872
},
{
"epoch": 3.9619450317124736,
"grad_norm": 1.2351728677749634,
"learning_rate": 5.012411474628075e-07,
"loss": 0.6757962703704834,
"step": 1874
},
{
"epoch": 3.966173361522199,
"grad_norm": 0.8207041025161743,
"learning_rate": 5.009936238078976e-07,
"loss": 0.9615821838378906,
"step": 1876
},
{
"epoch": 3.970401691331924,
"grad_norm": 3.9564754962921143,
"learning_rate": 5.007735951594917e-07,
"loss": 0.47021615505218506,
"step": 1878
},
{
"epoch": 3.974630021141649,
"grad_norm": 0.9730548858642578,
"learning_rate": 5.005810642075292e-07,
"loss": 0.5955108404159546,
"step": 1880
},
{
"epoch": 3.9788583509513744,
"grad_norm": 1.1672887802124023,
"learning_rate": 5.00416033305778e-07,
"loss": 0.8675932884216309,
"step": 1882
},
{
"epoch": 3.983086680761099,
"grad_norm": 0.9139389395713806,
"learning_rate": 5.002785044718068e-07,
"loss": 1.0263160467147827,
"step": 1884
},
{
"epoch": 3.9873150105708244,
"grad_norm": 0.7081848978996277,
"learning_rate": 5.001684793869617e-07,
"loss": 0.7986045479774475,
"step": 1886
},
{
"epoch": 3.9915433403805496,
"grad_norm": 0.6204723119735718,
"learning_rate": 5.000859593963427e-07,
"loss": 0.962172269821167,
"step": 1888
},
{
"epoch": 3.995771670190275,
"grad_norm": 0.7160171270370483,
"learning_rate": 5.000309455087906e-07,
"loss": 0.9778663516044617,
"step": 1890
},
{
"epoch": 4.0,
"grad_norm": 0.8123812079429626,
"learning_rate": 5.000034383968715e-07,
"loss": 0.5614367723464966,
"step": 1892
},
{
"epoch": 4.0,
"step": 1892,
"total_flos": 3.554237146892075e+18,
"train_loss": 0.9275046758280858,
"train_runtime": 19158.8521,
"train_samples_per_second": 2.963,
"train_steps_per_second": 0.099
}
],
"logging_steps": 2,
"max_steps": 1892,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 99999,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.554237146892075e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}