9b-87 / trainer_state.json
furproxy's picture
Upload folder using huggingface_hub
2e23d14 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 500,
"global_step": 1804,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004434589800443459,
"grad_norm": 3.654876470565796,
"learning_rate": 5.494505494505495e-08,
"loss": 1.8642040491104126,
"step": 2
},
{
"epoch": 0.008869179600886918,
"grad_norm": 8.487046241760254,
"learning_rate": 1.6483516483516484e-07,
"loss": 2.128927230834961,
"step": 4
},
{
"epoch": 0.013303769401330377,
"grad_norm": 5.926363945007324,
"learning_rate": 2.7472527472527475e-07,
"loss": 1.9033676385879517,
"step": 6
},
{
"epoch": 0.017738359201773836,
"grad_norm": 1.700243353843689,
"learning_rate": 3.846153846153847e-07,
"loss": 1.8230125904083252,
"step": 8
},
{
"epoch": 0.022172949002217297,
"grad_norm": 4.129543304443359,
"learning_rate": 4.945054945054946e-07,
"loss": 1.6371376514434814,
"step": 10
},
{
"epoch": 0.026607538802660754,
"grad_norm": 2.8941972255706787,
"learning_rate": 6.043956043956044e-07,
"loss": 2.1972343921661377,
"step": 12
},
{
"epoch": 0.031042128603104215,
"grad_norm": 6.371809005737305,
"learning_rate": 7.142857142857143e-07,
"loss": 1.9573174715042114,
"step": 14
},
{
"epoch": 0.03547671840354767,
"grad_norm": 7.418441295623779,
"learning_rate": 8.241758241758242e-07,
"loss": 1.5130360126495361,
"step": 16
},
{
"epoch": 0.03991130820399113,
"grad_norm": 1.8268234729766846,
"learning_rate": 9.340659340659342e-07,
"loss": 1.441575527191162,
"step": 18
},
{
"epoch": 0.04434589800443459,
"grad_norm": 8.918134689331055,
"learning_rate": 1.0439560439560442e-06,
"loss": 1.6151642799377441,
"step": 20
},
{
"epoch": 0.04878048780487805,
"grad_norm": 7.498224258422852,
"learning_rate": 1.153846153846154e-06,
"loss": 1.4749759435653687,
"step": 22
},
{
"epoch": 0.05321507760532151,
"grad_norm": 1.2202322483062744,
"learning_rate": 1.2637362637362637e-06,
"loss": 1.5966670513153076,
"step": 24
},
{
"epoch": 0.057649667405764965,
"grad_norm": 1.1278345584869385,
"learning_rate": 1.3736263736263738e-06,
"loss": 1.5051370859146118,
"step": 26
},
{
"epoch": 0.06208425720620843,
"grad_norm": 1.166769027709961,
"learning_rate": 1.4835164835164837e-06,
"loss": 1.1861467361450195,
"step": 28
},
{
"epoch": 0.06651884700665188,
"grad_norm": 1.2690566778182983,
"learning_rate": 1.5934065934065933e-06,
"loss": 1.3006117343902588,
"step": 30
},
{
"epoch": 0.07095343680709534,
"grad_norm": 2.7064690589904785,
"learning_rate": 1.7032967032967034e-06,
"loss": 1.2063751220703125,
"step": 32
},
{
"epoch": 0.07538802660753881,
"grad_norm": 0.8439352512359619,
"learning_rate": 1.8131868131868133e-06,
"loss": 1.574849247932434,
"step": 34
},
{
"epoch": 0.07982261640798226,
"grad_norm": 3.7528631687164307,
"learning_rate": 1.9230769230769234e-06,
"loss": 1.334853172302246,
"step": 36
},
{
"epoch": 0.08425720620842572,
"grad_norm": 2.2949788570404053,
"learning_rate": 2.032967032967033e-06,
"loss": 1.5959619283676147,
"step": 38
},
{
"epoch": 0.08869179600886919,
"grad_norm": 2.2293429374694824,
"learning_rate": 2.1428571428571427e-06,
"loss": 0.8560835719108582,
"step": 40
},
{
"epoch": 0.09312638580931264,
"grad_norm": 1.0512415170669556,
"learning_rate": 2.252747252747253e-06,
"loss": 1.4596399068832397,
"step": 42
},
{
"epoch": 0.0975609756097561,
"grad_norm": 2.0517866611480713,
"learning_rate": 2.362637362637363e-06,
"loss": 1.403381109237671,
"step": 44
},
{
"epoch": 0.10199556541019955,
"grad_norm": 1.8148796558380127,
"learning_rate": 2.472527472527473e-06,
"loss": 1.419820785522461,
"step": 46
},
{
"epoch": 0.10643015521064302,
"grad_norm": 2.2042646408081055,
"learning_rate": 2.582417582417583e-06,
"loss": 1.6194192171096802,
"step": 48
},
{
"epoch": 0.11086474501108648,
"grad_norm": 1.0148158073425293,
"learning_rate": 2.6923076923076923e-06,
"loss": 1.4980335235595703,
"step": 50
},
{
"epoch": 0.11529933481152993,
"grad_norm": 2.0108230113983154,
"learning_rate": 2.8021978021978024e-06,
"loss": 1.4199347496032715,
"step": 52
},
{
"epoch": 0.1197339246119734,
"grad_norm": 1.539353847503662,
"learning_rate": 2.9120879120879125e-06,
"loss": 1.4115248918533325,
"step": 54
},
{
"epoch": 0.12416851441241686,
"grad_norm": 15.816386222839355,
"learning_rate": 3.021978021978022e-06,
"loss": 1.1549075841903687,
"step": 56
},
{
"epoch": 0.1286031042128603,
"grad_norm": 1.0177377462387085,
"learning_rate": 3.1318681318681323e-06,
"loss": 1.3620848655700684,
"step": 58
},
{
"epoch": 0.13303769401330376,
"grad_norm": 0.6448049545288086,
"learning_rate": 3.2417582417582424e-06,
"loss": 1.4034713506698608,
"step": 60
},
{
"epoch": 0.13747228381374724,
"grad_norm": 1.29705810546875,
"learning_rate": 3.3516483516483516e-06,
"loss": 1.367940068244934,
"step": 62
},
{
"epoch": 0.1419068736141907,
"grad_norm": 2.276944637298584,
"learning_rate": 3.4615384615384617e-06,
"loss": 1.4364449977874756,
"step": 64
},
{
"epoch": 0.14634146341463414,
"grad_norm": 1.4872463941574097,
"learning_rate": 3.5714285714285718e-06,
"loss": 1.4937427043914795,
"step": 66
},
{
"epoch": 0.15077605321507762,
"grad_norm": 1.1286942958831787,
"learning_rate": 3.681318681318682e-06,
"loss": 1.3563040494918823,
"step": 68
},
{
"epoch": 0.15521064301552107,
"grad_norm": 1.185439109802246,
"learning_rate": 3.7912087912087915e-06,
"loss": 1.3079450130462646,
"step": 70
},
{
"epoch": 0.15964523281596452,
"grad_norm": 1.4064218997955322,
"learning_rate": 3.901098901098901e-06,
"loss": 1.3620179891586304,
"step": 72
},
{
"epoch": 0.164079822616408,
"grad_norm": 1.5207171440124512,
"learning_rate": 4.010989010989012e-06,
"loss": 1.2632296085357666,
"step": 74
},
{
"epoch": 0.16851441241685144,
"grad_norm": 1.1961833238601685,
"learning_rate": 4.120879120879121e-06,
"loss": 1.3436288833618164,
"step": 76
},
{
"epoch": 0.1729490022172949,
"grad_norm": 1.7947943210601807,
"learning_rate": 4.230769230769231e-06,
"loss": 1.3881783485412598,
"step": 78
},
{
"epoch": 0.17738359201773837,
"grad_norm": 1.8594613075256348,
"learning_rate": 4.340659340659341e-06,
"loss": 1.3418405055999756,
"step": 80
},
{
"epoch": 0.18181818181818182,
"grad_norm": 2.344498872756958,
"learning_rate": 4.45054945054945e-06,
"loss": 0.8364058136940002,
"step": 82
},
{
"epoch": 0.18625277161862527,
"grad_norm": 1.0296517610549927,
"learning_rate": 4.560439560439561e-06,
"loss": 1.0285269021987915,
"step": 84
},
{
"epoch": 0.19068736141906872,
"grad_norm": 2.831413507461548,
"learning_rate": 4.6703296703296706e-06,
"loss": 0.9984404444694519,
"step": 86
},
{
"epoch": 0.1951219512195122,
"grad_norm": 1.0203207731246948,
"learning_rate": 4.780219780219781e-06,
"loss": 1.2625550031661987,
"step": 88
},
{
"epoch": 0.19955654101995565,
"grad_norm": 2.5028977394104004,
"learning_rate": 4.890109890109891e-06,
"loss": 1.3904521465301514,
"step": 90
},
{
"epoch": 0.2039911308203991,
"grad_norm": 1.2700505256652832,
"learning_rate": 5e-06,
"loss": 1.2553861141204834,
"step": 92
},
{
"epoch": 0.20842572062084258,
"grad_norm": 0.8640641570091248,
"learning_rate": 4.999984864490455e-06,
"loss": 1.2549915313720703,
"step": 94
},
{
"epoch": 0.21286031042128603,
"grad_norm": 2.820838212966919,
"learning_rate": 4.999939458165447e-06,
"loss": 1.7880680561065674,
"step": 96
},
{
"epoch": 0.21729490022172948,
"grad_norm": 1.3258427381515503,
"learning_rate": 4.999863781635863e-06,
"loss": 0.9339628219604492,
"step": 98
},
{
"epoch": 0.22172949002217296,
"grad_norm": 0.881152868270874,
"learning_rate": 4.999757835919841e-06,
"loss": 1.150363564491272,
"step": 100
},
{
"epoch": 0.2261640798226164,
"grad_norm": 0.9821615219116211,
"learning_rate": 4.9996216224427495e-06,
"loss": 1.3016384840011597,
"step": 102
},
{
"epoch": 0.23059866962305986,
"grad_norm": 2.3708696365356445,
"learning_rate": 4.999455143037178e-06,
"loss": 1.038110375404358,
"step": 104
},
{
"epoch": 0.23503325942350334,
"grad_norm": 1.0569431781768799,
"learning_rate": 4.999258399942903e-06,
"loss": 1.2825589179992676,
"step": 106
},
{
"epoch": 0.2394678492239468,
"grad_norm": 1.5671284198760986,
"learning_rate": 4.9990313958068645e-06,
"loss": 1.477828860282898,
"step": 108
},
{
"epoch": 0.24390243902439024,
"grad_norm": 1.4664688110351562,
"learning_rate": 4.998774133683127e-06,
"loss": 1.0344974994659424,
"step": 110
},
{
"epoch": 0.24833702882483372,
"grad_norm": 2.032390594482422,
"learning_rate": 4.9984866170328426e-06,
"loss": 1.2917076349258423,
"step": 112
},
{
"epoch": 0.25277161862527714,
"grad_norm": 1.4232149124145508,
"learning_rate": 4.998168849724196e-06,
"loss": 0.7877748012542725,
"step": 114
},
{
"epoch": 0.2572062084257206,
"grad_norm": 1.1114603281021118,
"learning_rate": 4.997820836032363e-06,
"loss": 1.3067116737365723,
"step": 116
},
{
"epoch": 0.2616407982261641,
"grad_norm": 0.5859808325767517,
"learning_rate": 4.997442580639443e-06,
"loss": 1.0599734783172607,
"step": 118
},
{
"epoch": 0.2660753880266075,
"grad_norm": 1.1004986763000488,
"learning_rate": 4.997034088634404e-06,
"loss": 1.253132700920105,
"step": 120
},
{
"epoch": 0.270509977827051,
"grad_norm": 1.7587755918502808,
"learning_rate": 4.996595365513012e-06,
"loss": 1.1558163166046143,
"step": 122
},
{
"epoch": 0.2749445676274945,
"grad_norm": 1.6989275217056274,
"learning_rate": 4.9961264171777515e-06,
"loss": 1.5596798658370972,
"step": 124
},
{
"epoch": 0.2793791574279379,
"grad_norm": 1.1851608753204346,
"learning_rate": 4.995627249937755e-06,
"loss": 0.9473103284835815,
"step": 126
},
{
"epoch": 0.2838137472283814,
"grad_norm": 1.4158447980880737,
"learning_rate": 4.995097870508711e-06,
"loss": 1.1967135667800903,
"step": 128
},
{
"epoch": 0.28824833702882485,
"grad_norm": 1.3109447956085205,
"learning_rate": 4.994538286012777e-06,
"loss": 0.9262881875038147,
"step": 130
},
{
"epoch": 0.2926829268292683,
"grad_norm": 2.214419364929199,
"learning_rate": 4.993948503978484e-06,
"loss": 0.9783093333244324,
"step": 132
},
{
"epoch": 0.29711751662971175,
"grad_norm": 3.2269115447998047,
"learning_rate": 4.993328532340633e-06,
"loss": 0.9716333150863647,
"step": 134
},
{
"epoch": 0.30155210643015523,
"grad_norm": 11.220888137817383,
"learning_rate": 4.99267837944019e-06,
"loss": 1.1295907497406006,
"step": 136
},
{
"epoch": 0.30598669623059865,
"grad_norm": 12.861579895019531,
"learning_rate": 4.991998054024172e-06,
"loss": 0.8071277141571045,
"step": 138
},
{
"epoch": 0.31042128603104213,
"grad_norm": 2.3948023319244385,
"learning_rate": 4.991287565245534e-06,
"loss": 1.0442757606506348,
"step": 140
},
{
"epoch": 0.3148558758314856,
"grad_norm": 8.661370277404785,
"learning_rate": 4.990546922663039e-06,
"loss": 0.9572257995605469,
"step": 142
},
{
"epoch": 0.31929046563192903,
"grad_norm": 3.8054051399230957,
"learning_rate": 4.989776136241134e-06,
"loss": 0.9722259044647217,
"step": 144
},
{
"epoch": 0.3237250554323725,
"grad_norm": 1.0706456899642944,
"learning_rate": 4.988975216349814e-06,
"loss": 0.9358381032943726,
"step": 146
},
{
"epoch": 0.328159645232816,
"grad_norm": 1.808540940284729,
"learning_rate": 4.988144173764486e-06,
"loss": 1.3917360305786133,
"step": 148
},
{
"epoch": 0.3325942350332594,
"grad_norm": 2.059934139251709,
"learning_rate": 4.987283019665817e-06,
"loss": 1.1396145820617676,
"step": 150
},
{
"epoch": 0.3370288248337029,
"grad_norm": 2.9052047729492188,
"learning_rate": 4.986391765639592e-06,
"loss": 1.0867761373519897,
"step": 152
},
{
"epoch": 0.34146341463414637,
"grad_norm": 1.053331732749939,
"learning_rate": 4.985470423676551e-06,
"loss": 1.2601056098937988,
"step": 154
},
{
"epoch": 0.3458980044345898,
"grad_norm": 2.696256399154663,
"learning_rate": 4.984519006172232e-06,
"loss": 1.2030073404312134,
"step": 156
},
{
"epoch": 0.35033259423503327,
"grad_norm": 0.9389686584472656,
"learning_rate": 4.983537525926804e-06,
"loss": 1.2913192510604858,
"step": 158
},
{
"epoch": 0.35476718403547675,
"grad_norm": 1.6440863609313965,
"learning_rate": 4.982525996144891e-06,
"loss": 1.0989590883255005,
"step": 160
},
{
"epoch": 0.35920177383592017,
"grad_norm": 0.938507616519928,
"learning_rate": 4.981484430435399e-06,
"loss": 0.8731570839881897,
"step": 162
},
{
"epoch": 0.36363636363636365,
"grad_norm": 3.254096508026123,
"learning_rate": 4.98041284281133e-06,
"loss": 0.8694003224372864,
"step": 164
},
{
"epoch": 0.36807095343680707,
"grad_norm": 0.8564817309379578,
"learning_rate": 4.979311247689596e-06,
"loss": 1.2683701515197754,
"step": 166
},
{
"epoch": 0.37250554323725055,
"grad_norm": 1.8692625761032104,
"learning_rate": 4.978179659890821e-06,
"loss": 1.1822130680084229,
"step": 168
},
{
"epoch": 0.376940133037694,
"grad_norm": 1.4104257822036743,
"learning_rate": 4.977018094639146e-06,
"loss": 1.240437626838684,
"step": 170
},
{
"epoch": 0.38137472283813745,
"grad_norm": 2.1901957988739014,
"learning_rate": 4.975826567562023e-06,
"loss": 0.7567204236984253,
"step": 172
},
{
"epoch": 0.3858093126385809,
"grad_norm": 2.066593647003174,
"learning_rate": 4.97460509469e-06,
"loss": 1.6173328161239624,
"step": 174
},
{
"epoch": 0.3902439024390244,
"grad_norm": 1.6422075033187866,
"learning_rate": 4.973353692456513e-06,
"loss": 1.2616726160049438,
"step": 176
},
{
"epoch": 0.3946784922394678,
"grad_norm": 2.5441336631774902,
"learning_rate": 4.972072377697661e-06,
"loss": 1.3162559270858765,
"step": 178
},
{
"epoch": 0.3991130820399113,
"grad_norm": 0.5965206027030945,
"learning_rate": 4.9707611676519775e-06,
"loss": 1.0434683561325073,
"step": 180
},
{
"epoch": 0.4035476718403548,
"grad_norm": 1.4375978708267212,
"learning_rate": 4.969420079960203e-06,
"loss": 1.2631562948226929,
"step": 182
},
{
"epoch": 0.4079822616407982,
"grad_norm": 1.5910781621932983,
"learning_rate": 4.968049132665045e-06,
"loss": 0.9169143438339233,
"step": 184
},
{
"epoch": 0.4124168514412417,
"grad_norm": 1.1630430221557617,
"learning_rate": 4.966648344210936e-06,
"loss": 0.9765180945396423,
"step": 186
},
{
"epoch": 0.41685144124168516,
"grad_norm": 0.955808699131012,
"learning_rate": 4.965217733443782e-06,
"loss": 0.9666871428489685,
"step": 188
},
{
"epoch": 0.4212860310421286,
"grad_norm": 8.470755577087402,
"learning_rate": 4.963757319610716e-06,
"loss": 0.9839523434638977,
"step": 190
},
{
"epoch": 0.42572062084257206,
"grad_norm": 2.580411672592163,
"learning_rate": 4.962267122359835e-06,
"loss": 0.8509742617607117,
"step": 192
},
{
"epoch": 0.43015521064301554,
"grad_norm": 1.3341411352157593,
"learning_rate": 4.960747161739931e-06,
"loss": 1.269567608833313,
"step": 194
},
{
"epoch": 0.43458980044345896,
"grad_norm": 1.2975329160690308,
"learning_rate": 4.9591974582002324e-06,
"loss": 1.5401256084442139,
"step": 196
},
{
"epoch": 0.43902439024390244,
"grad_norm": 2.489485740661621,
"learning_rate": 4.957618032590118e-06,
"loss": 1.2710596323013306,
"step": 198
},
{
"epoch": 0.4434589800443459,
"grad_norm": 0.6644977927207947,
"learning_rate": 4.956008906158842e-06,
"loss": 1.1192097663879395,
"step": 200
},
{
"epoch": 0.44789356984478934,
"grad_norm": 1.3568873405456543,
"learning_rate": 4.954370100555249e-06,
"loss": 1.2545751333236694,
"step": 202
},
{
"epoch": 0.4523281596452328,
"grad_norm": 1.7675485610961914,
"learning_rate": 4.952701637827476e-06,
"loss": 1.2129216194152832,
"step": 204
},
{
"epoch": 0.4567627494456763,
"grad_norm": 0.9738250374794006,
"learning_rate": 4.951003540422668e-06,
"loss": 1.0656797885894775,
"step": 206
},
{
"epoch": 0.4611973392461197,
"grad_norm": 1.6657801866531372,
"learning_rate": 4.949275831186663e-06,
"loss": 1.0429651737213135,
"step": 208
},
{
"epoch": 0.4656319290465632,
"grad_norm": 2.380303382873535,
"learning_rate": 4.947518533363691e-06,
"loss": 0.6534411311149597,
"step": 210
},
{
"epoch": 0.4700665188470067,
"grad_norm": 3.069462299346924,
"learning_rate": 4.945731670596062e-06,
"loss": 0.8374763131141663,
"step": 212
},
{
"epoch": 0.4745011086474501,
"grad_norm": 0.9472932815551758,
"learning_rate": 4.943915266923845e-06,
"loss": 1.0585463047027588,
"step": 214
},
{
"epoch": 0.4789356984478936,
"grad_norm": 0.9892237186431885,
"learning_rate": 4.942069346784547e-06,
"loss": 1.0768964290618896,
"step": 216
},
{
"epoch": 0.48337028824833705,
"grad_norm": 0.8981955051422119,
"learning_rate": 4.940193935012785e-06,
"loss": 1.110266923904419,
"step": 218
},
{
"epoch": 0.4878048780487805,
"grad_norm": 4.676793575286865,
"learning_rate": 4.938289056839946e-06,
"loss": 1.206817865371704,
"step": 220
},
{
"epoch": 0.49223946784922396,
"grad_norm": 1.3366749286651611,
"learning_rate": 4.936354737893854e-06,
"loss": 1.227997899055481,
"step": 222
},
{
"epoch": 0.49667405764966743,
"grad_norm": 0.8297674059867859,
"learning_rate": 4.934391004198424e-06,
"loss": 1.1951391696929932,
"step": 224
},
{
"epoch": 0.5011086474501109,
"grad_norm": 0.9005066156387329,
"learning_rate": 4.932397882173307e-06,
"loss": 1.2028762102127075,
"step": 226
},
{
"epoch": 0.5055432372505543,
"grad_norm": 0.83659428358078,
"learning_rate": 4.930375398633543e-06,
"loss": 1.2505710124969482,
"step": 228
},
{
"epoch": 0.5099778270509978,
"grad_norm": 1.2538983821868896,
"learning_rate": 4.928323580789192e-06,
"loss": 1.842966914176941,
"step": 230
},
{
"epoch": 0.5144124168514412,
"grad_norm": 1.6254911422729492,
"learning_rate": 4.926242456244973e-06,
"loss": 0.8636565804481506,
"step": 232
},
{
"epoch": 0.5188470066518847,
"grad_norm": 1.2163554430007935,
"learning_rate": 4.924132052999892e-06,
"loss": 1.2652028799057007,
"step": 234
},
{
"epoch": 0.5232815964523282,
"grad_norm": 0.7137370109558105,
"learning_rate": 4.921992399446861e-06,
"loss": 0.9469624757766724,
"step": 236
},
{
"epoch": 0.5277161862527716,
"grad_norm": 4.001765727996826,
"learning_rate": 4.919823524372323e-06,
"loss": 0.9857266545295715,
"step": 238
},
{
"epoch": 0.532150776053215,
"grad_norm": 1.0323377847671509,
"learning_rate": 4.91762545695586e-06,
"loss": 1.258691430091858,
"step": 240
},
{
"epoch": 0.5365853658536586,
"grad_norm": 0.9117689728736877,
"learning_rate": 4.9153982267698e-06,
"loss": 1.3205041885375977,
"step": 242
},
{
"epoch": 0.541019955654102,
"grad_norm": 3.387061357498169,
"learning_rate": 4.913141863778822e-06,
"loss": 0.9653130173683167,
"step": 244
},
{
"epoch": 0.5454545454545454,
"grad_norm": 1.0343185663223267,
"learning_rate": 4.910856398339553e-06,
"loss": 1.261291742324829,
"step": 246
},
{
"epoch": 0.549889135254989,
"grad_norm": 1.0249489545822144,
"learning_rate": 4.9085418612001545e-06,
"loss": 1.4818992614746094,
"step": 248
},
{
"epoch": 0.5543237250554324,
"grad_norm": 1.422224521636963,
"learning_rate": 4.906198283499916e-06,
"loss": 1.219714879989624,
"step": 250
},
{
"epoch": 0.5587583148558758,
"grad_norm": 4.456541538238525,
"learning_rate": 4.903825696768829e-06,
"loss": 0.6861237287521362,
"step": 252
},
{
"epoch": 0.5631929046563193,
"grad_norm": 2.3371129035949707,
"learning_rate": 4.901424132927172e-06,
"loss": 1.4689810276031494,
"step": 254
},
{
"epoch": 0.5676274944567627,
"grad_norm": 1.2416448593139648,
"learning_rate": 4.898993624285069e-06,
"loss": 1.321006417274475,
"step": 256
},
{
"epoch": 0.5720620842572062,
"grad_norm": 0.8489322662353516,
"learning_rate": 4.896534203542062e-06,
"loss": 1.2481799125671387,
"step": 258
},
{
"epoch": 0.5764966740576497,
"grad_norm": 1.086107611656189,
"learning_rate": 4.894045903786675e-06,
"loss": 1.280672311782837,
"step": 260
},
{
"epoch": 0.5809312638580931,
"grad_norm": 2.334581136703491,
"learning_rate": 4.891528758495961e-06,
"loss": 0.7310733199119568,
"step": 262
},
{
"epoch": 0.5853658536585366,
"grad_norm": 1.0172513723373413,
"learning_rate": 4.888982801535053e-06,
"loss": 1.364458680152893,
"step": 264
},
{
"epoch": 0.5898004434589801,
"grad_norm": 1.2178974151611328,
"learning_rate": 4.886408067156712e-06,
"loss": 1.048864722251892,
"step": 266
},
{
"epoch": 0.5942350332594235,
"grad_norm": 1.2515499591827393,
"learning_rate": 4.883804590000865e-06,
"loss": 1.5365700721740723,
"step": 268
},
{
"epoch": 0.5986696230598669,
"grad_norm": 2.658390998840332,
"learning_rate": 4.881172405094138e-06,
"loss": 1.1716382503509521,
"step": 270
},
{
"epoch": 0.6031042128603105,
"grad_norm": 0.843982458114624,
"learning_rate": 4.878511547849383e-06,
"loss": 1.2317594289779663,
"step": 272
},
{
"epoch": 0.6075388026607539,
"grad_norm": 2.4913628101348877,
"learning_rate": 4.875822054065203e-06,
"loss": 1.2178195714950562,
"step": 274
},
{
"epoch": 0.6119733924611973,
"grad_norm": 0.900059163570404,
"learning_rate": 4.8731039599254754e-06,
"loss": 1.2188351154327393,
"step": 276
},
{
"epoch": 0.6164079822616408,
"grad_norm": 4.912473678588867,
"learning_rate": 4.870357301998856e-06,
"loss": 1.2045741081237793,
"step": 278
},
{
"epoch": 0.6208425720620843,
"grad_norm": 0.8861173987388611,
"learning_rate": 4.867582117238294e-06,
"loss": 1.2807667255401611,
"step": 280
},
{
"epoch": 0.6252771618625277,
"grad_norm": 13.176647186279297,
"learning_rate": 4.864778442980532e-06,
"loss": 0.8604080677032471,
"step": 282
},
{
"epoch": 0.6297117516629712,
"grad_norm": 0.9435498118400574,
"learning_rate": 4.861946316945605e-06,
"loss": 1.248250961303711,
"step": 284
},
{
"epoch": 0.6341463414634146,
"grad_norm": 1.3723853826522827,
"learning_rate": 4.859085777236331e-06,
"loss": 1.3286021947860718,
"step": 286
},
{
"epoch": 0.6385809312638581,
"grad_norm": 0.926493227481842,
"learning_rate": 4.8561968623377985e-06,
"loss": 1.2437561750411987,
"step": 288
},
{
"epoch": 0.6430155210643016,
"grad_norm": 0.6830118894577026,
"learning_rate": 4.853279611116852e-06,
"loss": 1.2304435968399048,
"step": 290
},
{
"epoch": 0.647450110864745,
"grad_norm": 1.291746973991394,
"learning_rate": 4.850334062821566e-06,
"loss": 1.350826382637024,
"step": 292
},
{
"epoch": 0.6518847006651884,
"grad_norm": 3.118695020675659,
"learning_rate": 4.8473602570807185e-06,
"loss": 0.9130444526672363,
"step": 294
},
{
"epoch": 0.656319290465632,
"grad_norm": 1.9869314432144165,
"learning_rate": 4.844358233903254e-06,
"loss": 0.9922571778297424,
"step": 296
},
{
"epoch": 0.6607538802660754,
"grad_norm": 1.162611961364746,
"learning_rate": 4.841328033677753e-06,
"loss": 1.2070796489715576,
"step": 298
},
{
"epoch": 0.6651884700665188,
"grad_norm": 0.9664815664291382,
"learning_rate": 4.83826969717188e-06,
"loss": 1.2433284521102905,
"step": 300
},
{
"epoch": 0.6696230598669624,
"grad_norm": 0.793887197971344,
"learning_rate": 4.835183265531843e-06,
"loss": 1.1951156854629517,
"step": 302
},
{
"epoch": 0.6740576496674058,
"grad_norm": 1.1093353033065796,
"learning_rate": 4.832068780281831e-06,
"loss": 1.2604377269744873,
"step": 304
},
{
"epoch": 0.6784922394678492,
"grad_norm": 1.292920470237732,
"learning_rate": 4.828926283323464e-06,
"loss": 1.2320196628570557,
"step": 306
},
{
"epoch": 0.6829268292682927,
"grad_norm": 0.9268571138381958,
"learning_rate": 4.8257558169352254e-06,
"loss": 0.8704907894134521,
"step": 308
},
{
"epoch": 0.6873614190687362,
"grad_norm": 1.2235312461853027,
"learning_rate": 4.8225574237718906e-06,
"loss": 1.2371546030044556,
"step": 310
},
{
"epoch": 0.6917960088691796,
"grad_norm": 0.9682984352111816,
"learning_rate": 4.819331146863958e-06,
"loss": 1.2055768966674805,
"step": 312
},
{
"epoch": 0.6962305986696231,
"grad_norm": 0.6748729348182678,
"learning_rate": 4.8160770296170685e-06,
"loss": 1.225193977355957,
"step": 314
},
{
"epoch": 0.7006651884700665,
"grad_norm": 1.018369197845459,
"learning_rate": 4.812795115811419e-06,
"loss": 1.330003261566162,
"step": 316
},
{
"epoch": 0.70509977827051,
"grad_norm": 1.0128453969955444,
"learning_rate": 4.809485449601177e-06,
"loss": 0.991382360458374,
"step": 318
},
{
"epoch": 0.7095343680709535,
"grad_norm": 1.3138883113861084,
"learning_rate": 4.806148075513883e-06,
"loss": 0.9641591310501099,
"step": 320
},
{
"epoch": 0.7139689578713969,
"grad_norm": 1.802852988243103,
"learning_rate": 4.802783038449857e-06,
"loss": 1.0050233602523804,
"step": 322
},
{
"epoch": 0.7184035476718403,
"grad_norm": 1.2100204229354858,
"learning_rate": 4.799390383681587e-06,
"loss": 1.0627820491790771,
"step": 324
},
{
"epoch": 0.7228381374722838,
"grad_norm": 2.972120761871338,
"learning_rate": 4.795970156853124e-06,
"loss": 1.1004713773727417,
"step": 326
},
{
"epoch": 0.7272727272727273,
"grad_norm": 1.5077686309814453,
"learning_rate": 4.792522403979471e-06,
"loss": 0.8185582160949707,
"step": 328
},
{
"epoch": 0.7317073170731707,
"grad_norm": 1.4305667877197266,
"learning_rate": 4.789047171445957e-06,
"loss": 0.8279497623443604,
"step": 330
},
{
"epoch": 0.7361419068736141,
"grad_norm": 3.0829317569732666,
"learning_rate": 4.785544506007619e-06,
"loss": 1.3781704902648926,
"step": 332
},
{
"epoch": 0.7405764966740577,
"grad_norm": 5.511263370513916,
"learning_rate": 4.782014454788566e-06,
"loss": 1.190948724746704,
"step": 334
},
{
"epoch": 0.7450110864745011,
"grad_norm": 0.6818461418151855,
"learning_rate": 4.778457065281355e-06,
"loss": 1.2204951047897339,
"step": 336
},
{
"epoch": 0.7494456762749445,
"grad_norm": 8.291729927062988,
"learning_rate": 4.774872385346345e-06,
"loss": 0.6817957162857056,
"step": 338
},
{
"epoch": 0.753880266075388,
"grad_norm": 3.502612590789795,
"learning_rate": 4.7712604632110524e-06,
"loss": 0.32471054792404175,
"step": 340
},
{
"epoch": 0.7583148558758315,
"grad_norm": 1.0008634328842163,
"learning_rate": 4.767621347469506e-06,
"loss": 0.8657422065734863,
"step": 342
},
{
"epoch": 0.7627494456762749,
"grad_norm": 1.1472429037094116,
"learning_rate": 4.7639550870815895e-06,
"loss": 1.2735161781311035,
"step": 344
},
{
"epoch": 0.7671840354767184,
"grad_norm": 0.7090339660644531,
"learning_rate": 4.760261731372388e-06,
"loss": 1.225376844406128,
"step": 346
},
{
"epoch": 0.7716186252771619,
"grad_norm": 0.8595665693283081,
"learning_rate": 4.75654133003152e-06,
"loss": 1.0870047807693481,
"step": 348
},
{
"epoch": 0.7760532150776053,
"grad_norm": 1.020011305809021,
"learning_rate": 4.752793933112469e-06,
"loss": 1.2136049270629883,
"step": 350
},
{
"epoch": 0.7804878048780488,
"grad_norm": 2.301769971847534,
"learning_rate": 4.749019591031914e-06,
"loss": 1.139218807220459,
"step": 352
},
{
"epoch": 0.7849223946784922,
"grad_norm": 1.7465238571166992,
"learning_rate": 4.745218354569045e-06,
"loss": 1.0103105306625366,
"step": 354
},
{
"epoch": 0.7893569844789357,
"grad_norm": 6.574288368225098,
"learning_rate": 4.741390274864885e-06,
"loss": 1.2062402963638306,
"step": 356
},
{
"epoch": 0.7937915742793792,
"grad_norm": 2.9775516986846924,
"learning_rate": 4.737535403421601e-06,
"loss": 1.251714825630188,
"step": 358
},
{
"epoch": 0.7982261640798226,
"grad_norm": 1.402034044265747,
"learning_rate": 4.733653792101809e-06,
"loss": 1.215922474861145,
"step": 360
},
{
"epoch": 0.802660753880266,
"grad_norm": 1.4241507053375244,
"learning_rate": 4.729745493127878e-06,
"loss": 0.5939327478408813,
"step": 362
},
{
"epoch": 0.8070953436807096,
"grad_norm": 2.221510410308838,
"learning_rate": 4.725810559081227e-06,
"loss": 1.318565845489502,
"step": 364
},
{
"epoch": 0.811529933481153,
"grad_norm": 1.5592412948608398,
"learning_rate": 4.7218490429016175e-06,
"loss": 1.1835287809371948,
"step": 366
},
{
"epoch": 0.8159645232815964,
"grad_norm": 3.7534544467926025,
"learning_rate": 4.717860997886442e-06,
"loss": 0.9975278973579407,
"step": 368
},
{
"epoch": 0.8203991130820399,
"grad_norm": 5.241509437561035,
"learning_rate": 4.713846477690005e-06,
"loss": 0.8571419715881348,
"step": 370
},
{
"epoch": 0.8248337028824834,
"grad_norm": 1.2738577127456665,
"learning_rate": 4.709805536322804e-06,
"loss": 1.2139989137649536,
"step": 372
},
{
"epoch": 0.8292682926829268,
"grad_norm": 0.9885940551757812,
"learning_rate": 4.7057382281508e-06,
"loss": 1.2157939672470093,
"step": 374
},
{
"epoch": 0.8337028824833703,
"grad_norm": 1.920699119567871,
"learning_rate": 4.701644607894687e-06,
"loss": 1.1778194904327393,
"step": 376
},
{
"epoch": 0.8381374722838137,
"grad_norm": 1.2708183526992798,
"learning_rate": 4.697524730629159e-06,
"loss": 1.202418327331543,
"step": 378
},
{
"epoch": 0.8425720620842572,
"grad_norm": 8.80882740020752,
"learning_rate": 4.693378651782162e-06,
"loss": 0.723749041557312,
"step": 380
},
{
"epoch": 0.8470066518847007,
"grad_norm": 0.7982382774353027,
"learning_rate": 4.689206427134155e-06,
"loss": 1.2849069833755493,
"step": 382
},
{
"epoch": 0.8514412416851441,
"grad_norm": 0.8297720551490784,
"learning_rate": 4.6850081128173595e-06,
"loss": 1.1224342584609985,
"step": 384
},
{
"epoch": 0.8558758314855875,
"grad_norm": 0.8830884099006653,
"learning_rate": 4.680783765314994e-06,
"loss": 1.2344768047332764,
"step": 386
},
{
"epoch": 0.8603104212860311,
"grad_norm": 0.6799097657203674,
"learning_rate": 4.6765334414605315e-06,
"loss": 1.1405550241470337,
"step": 388
},
{
"epoch": 0.8647450110864745,
"grad_norm": 0.9640771746635437,
"learning_rate": 4.672257198436918e-06,
"loss": 1.23441481590271,
"step": 390
},
{
"epoch": 0.8691796008869179,
"grad_norm": 1.577943205833435,
"learning_rate": 4.667955093775814e-06,
"loss": 0.8823462128639221,
"step": 392
},
{
"epoch": 0.8736141906873615,
"grad_norm": 1.0975431203842163,
"learning_rate": 4.663627185356818e-06,
"loss": 1.201162576675415,
"step": 394
},
{
"epoch": 0.8780487804878049,
"grad_norm": 0.8264410495758057,
"learning_rate": 4.65927353140668e-06,
"loss": 1.1939952373504639,
"step": 396
},
{
"epoch": 0.8824833702882483,
"grad_norm": 1.166627049446106,
"learning_rate": 4.654894190498534e-06,
"loss": 1.1967897415161133,
"step": 398
},
{
"epoch": 0.8869179600886918,
"grad_norm": 0.6226066946983337,
"learning_rate": 4.650489221551095e-06,
"loss": 0.44957131147384644,
"step": 400
},
{
"epoch": 0.8913525498891353,
"grad_norm": 0.6761036515235901,
"learning_rate": 4.646058683827874e-06,
"loss": 1.025883436203003,
"step": 402
},
{
"epoch": 0.8957871396895787,
"grad_norm": 0.2941175103187561,
"learning_rate": 4.641602636936378e-06,
"loss": 0.9108548164367676,
"step": 404
},
{
"epoch": 0.9002217294900222,
"grad_norm": 1.1719696521759033,
"learning_rate": 4.637121140827311e-06,
"loss": 1.254028081893921,
"step": 406
},
{
"epoch": 0.9046563192904656,
"grad_norm": 2.666060447692871,
"learning_rate": 4.632614255793762e-06,
"loss": 1.1204774379730225,
"step": 408
},
{
"epoch": 0.9090909090909091,
"grad_norm": 3.2736313343048096,
"learning_rate": 4.6280820424704e-06,
"loss": 1.0682605504989624,
"step": 410
},
{
"epoch": 0.9135254988913526,
"grad_norm": 1.8158552646636963,
"learning_rate": 4.623524561832653e-06,
"loss": 1.22024667263031,
"step": 412
},
{
"epoch": 0.917960088691796,
"grad_norm": 1.1710323095321655,
"learning_rate": 4.618941875195893e-06,
"loss": 1.233978033065796,
"step": 414
},
{
"epoch": 0.9223946784922394,
"grad_norm": 1.603824257850647,
"learning_rate": 4.614334044214606e-06,
"loss": 0.9593857526779175,
"step": 416
},
{
"epoch": 0.926829268292683,
"grad_norm": 0.8521295189857483,
"learning_rate": 4.6097011308815645e-06,
"loss": 1.2425235509872437,
"step": 418
},
{
"epoch": 0.9312638580931264,
"grad_norm": 2.692030429840088,
"learning_rate": 4.605043197526996e-06,
"loss": 0.7656779289245605,
"step": 420
},
{
"epoch": 0.9356984478935698,
"grad_norm": 1.0381312370300293,
"learning_rate": 4.600360306817738e-06,
"loss": 1.4421385526657104,
"step": 422
},
{
"epoch": 0.9401330376940134,
"grad_norm": 21.29833984375,
"learning_rate": 4.595652521756403e-06,
"loss": 1.0444769859313965,
"step": 424
},
{
"epoch": 0.9445676274944568,
"grad_norm": 0.7946411967277527,
"learning_rate": 4.590919905680524e-06,
"loss": 1.1843209266662598,
"step": 426
},
{
"epoch": 0.9490022172949002,
"grad_norm": 0.4803466796875,
"learning_rate": 4.5861625222617065e-06,
"loss": 1.0634812116622925,
"step": 428
},
{
"epoch": 0.9534368070953437,
"grad_norm": 6.066927909851074,
"learning_rate": 4.58138043550477e-06,
"loss": 0.6745222806930542,
"step": 430
},
{
"epoch": 0.9578713968957872,
"grad_norm": 1.7354466915130615,
"learning_rate": 4.576573709746887e-06,
"loss": 1.2073925733566284,
"step": 432
},
{
"epoch": 0.9623059866962306,
"grad_norm": 2.441338300704956,
"learning_rate": 4.5717424096567205e-06,
"loss": 1.0565319061279297,
"step": 434
},
{
"epoch": 0.9667405764966741,
"grad_norm": 0.7328557968139648,
"learning_rate": 4.566886600233547e-06,
"loss": 1.2665467262268066,
"step": 436
},
{
"epoch": 0.9711751662971175,
"grad_norm": 0.6606398820877075,
"learning_rate": 4.56200634680639e-06,
"loss": 1.2194955348968506,
"step": 438
},
{
"epoch": 0.975609756097561,
"grad_norm": 3.465074062347412,
"learning_rate": 4.557101715033136e-06,
"loss": 0.7741403579711914,
"step": 440
},
{
"epoch": 0.9800443458980045,
"grad_norm": 0.9509384632110596,
"learning_rate": 4.552172770899652e-06,
"loss": 1.027595043182373,
"step": 442
},
{
"epoch": 0.9844789356984479,
"grad_norm": 1.1520196199417114,
"learning_rate": 4.547219580718899e-06,
"loss": 1.2680723667144775,
"step": 444
},
{
"epoch": 0.9889135254988913,
"grad_norm": 0.7497501373291016,
"learning_rate": 4.542242211130039e-06,
"loss": 1.1927207708358765,
"step": 446
},
{
"epoch": 0.9933481152993349,
"grad_norm": 0.9399945139884949,
"learning_rate": 4.537240729097539e-06,
"loss": 1.2298150062561035,
"step": 448
},
{
"epoch": 0.9977827050997783,
"grad_norm": 0.6698287129402161,
"learning_rate": 4.532215201910269e-06,
"loss": 1.0293582677841187,
"step": 450
},
{
"epoch": 1.0022172949002217,
"grad_norm": 6.488946914672852,
"learning_rate": 4.527165697180598e-06,
"loss": 1.0618946552276611,
"step": 452
},
{
"epoch": 1.0066518847006651,
"grad_norm": 1.9061259031295776,
"learning_rate": 4.522092282843481e-06,
"loss": 1.3013073205947876,
"step": 454
},
{
"epoch": 1.0110864745011086,
"grad_norm": 0.9116241335868835,
"learning_rate": 4.516995027155554e-06,
"loss": 1.2355761528015137,
"step": 456
},
{
"epoch": 1.0155210643015522,
"grad_norm": 0.6007906198501587,
"learning_rate": 4.511873998694204e-06,
"loss": 0.8137620687484741,
"step": 458
},
{
"epoch": 1.0199556541019956,
"grad_norm": 0.37421897053718567,
"learning_rate": 4.506729266356651e-06,
"loss": 0.8346569538116455,
"step": 460
},
{
"epoch": 1.024390243902439,
"grad_norm": 3.830796718597412,
"learning_rate": 4.5015608993590276e-06,
"loss": 0.5353830456733704,
"step": 462
},
{
"epoch": 1.0288248337028825,
"grad_norm": 1.997052550315857,
"learning_rate": 4.4963689672354375e-06,
"loss": 0.8425235152244568,
"step": 464
},
{
"epoch": 1.033259423503326,
"grad_norm": 1.2142010927200317,
"learning_rate": 4.491153539837026e-06,
"loss": 0.7943000793457031,
"step": 466
},
{
"epoch": 1.0376940133037693,
"grad_norm": 3.276048183441162,
"learning_rate": 4.4859146873310375e-06,
"loss": 0.9381541013717651,
"step": 468
},
{
"epoch": 1.042128603104213,
"grad_norm": 0.9781183004379272,
"learning_rate": 4.480652480199873e-06,
"loss": 0.6260444521903992,
"step": 470
},
{
"epoch": 1.0465631929046564,
"grad_norm": 1.1556280851364136,
"learning_rate": 4.475366989240147e-06,
"loss": 0.9917138814926147,
"step": 472
},
{
"epoch": 1.0509977827050998,
"grad_norm": 0.9337510466575623,
"learning_rate": 4.470058285561721e-06,
"loss": 1.046325445175171,
"step": 474
},
{
"epoch": 1.0554323725055432,
"grad_norm": 1.022441029548645,
"learning_rate": 4.464726440586761e-06,
"loss": 1.0449920892715454,
"step": 476
},
{
"epoch": 1.0598669623059866,
"grad_norm": 1.4056508541107178,
"learning_rate": 4.45937152604877e-06,
"loss": 1.094570517539978,
"step": 478
},
{
"epoch": 1.06430155210643,
"grad_norm": 1.2816241979599,
"learning_rate": 4.453993613991622e-06,
"loss": 0.6536943316459656,
"step": 480
},
{
"epoch": 1.0687361419068737,
"grad_norm": 1.1188907623291016,
"learning_rate": 4.4485927767685995e-06,
"loss": 1.139634609222412,
"step": 482
},
{
"epoch": 1.0731707317073171,
"grad_norm": 0.4301467835903168,
"learning_rate": 4.443169087041409e-06,
"loss": 0.923588216304779,
"step": 484
},
{
"epoch": 1.0776053215077606,
"grad_norm": 0.6617166996002197,
"learning_rate": 4.4377226177792145e-06,
"loss": 1.0248123407363892,
"step": 486
},
{
"epoch": 1.082039911308204,
"grad_norm": 1.1432987451553345,
"learning_rate": 4.432253442257649e-06,
"loss": 0.7850465774536133,
"step": 488
},
{
"epoch": 1.0864745011086474,
"grad_norm": 1.8113877773284912,
"learning_rate": 4.426761634057831e-06,
"loss": 0.9420559406280518,
"step": 490
},
{
"epoch": 1.0909090909090908,
"grad_norm": 1.6748836040496826,
"learning_rate": 4.421247267065375e-06,
"loss": 1.1039046049118042,
"step": 492
},
{
"epoch": 1.0953436807095343,
"grad_norm": 1.795003056526184,
"learning_rate": 4.415710415469394e-06,
"loss": 1.091720461845398,
"step": 494
},
{
"epoch": 1.099778270509978,
"grad_norm": 1.652651309967041,
"learning_rate": 4.410151153761506e-06,
"loss": 0.8766781091690063,
"step": 496
},
{
"epoch": 1.1042128603104213,
"grad_norm": 0.6152821183204651,
"learning_rate": 4.404569556734832e-06,
"loss": 1.0317732095718384,
"step": 498
},
{
"epoch": 1.1086474501108647,
"grad_norm": 0.956628680229187,
"learning_rate": 4.398965699482984e-06,
"loss": 0.8593730926513672,
"step": 500
},
{
"epoch": 1.1130820399113082,
"grad_norm": 0.9794469475746155,
"learning_rate": 4.39333965739906e-06,
"loss": 1.025899887084961,
"step": 502
},
{
"epoch": 1.1175166297117516,
"grad_norm": 2.0585477352142334,
"learning_rate": 4.3876915061746275e-06,
"loss": 0.928720235824585,
"step": 504
},
{
"epoch": 1.1219512195121952,
"grad_norm": 1.5161011219024658,
"learning_rate": 4.382021321798707e-06,
"loss": 1.0593522787094116,
"step": 506
},
{
"epoch": 1.1263858093126387,
"grad_norm": 1.4959526062011719,
"learning_rate": 4.376329180556745e-06,
"loss": 0.8792567849159241,
"step": 508
},
{
"epoch": 1.130820399113082,
"grad_norm": 0.7404501438140869,
"learning_rate": 4.370615159029594e-06,
"loss": 1.131915807723999,
"step": 510
},
{
"epoch": 1.1352549889135255,
"grad_norm": 8.584444999694824,
"learning_rate": 4.36487933409248e-06,
"loss": 0.5437202453613281,
"step": 512
},
{
"epoch": 1.139689578713969,
"grad_norm": 1.0118581056594849,
"learning_rate": 4.359121782913964e-06,
"loss": 0.48725366592407227,
"step": 514
},
{
"epoch": 1.1441241685144123,
"grad_norm": 1.0809403657913208,
"learning_rate": 4.3533425829549085e-06,
"loss": 0.9849007725715637,
"step": 516
},
{
"epoch": 1.1485587583148558,
"grad_norm": 2.9192471504211426,
"learning_rate": 4.347541811967436e-06,
"loss": 1.1810388565063477,
"step": 518
},
{
"epoch": 1.1529933481152994,
"grad_norm": 1.2919275760650635,
"learning_rate": 4.341719547993879e-06,
"loss": 1.126792550086975,
"step": 520
},
{
"epoch": 1.1574279379157428,
"grad_norm": 1.4183093309402466,
"learning_rate": 4.335875869365732e-06,
"loss": 0.5291884541511536,
"step": 522
},
{
"epoch": 1.1618625277161863,
"grad_norm": 1.3505902290344238,
"learning_rate": 4.330010854702598e-06,
"loss": 1.0048887729644775,
"step": 524
},
{
"epoch": 1.1662971175166297,
"grad_norm": 1.1992731094360352,
"learning_rate": 4.3241245829111324e-06,
"loss": 1.1825058460235596,
"step": 526
},
{
"epoch": 1.170731707317073,
"grad_norm": 1.0667892694473267,
"learning_rate": 4.318217133183978e-06,
"loss": 0.6172074675559998,
"step": 528
},
{
"epoch": 1.1751662971175167,
"grad_norm": 0.6770420074462891,
"learning_rate": 4.312288584998697e-06,
"loss": 0.8084365129470825,
"step": 530
},
{
"epoch": 1.1796008869179602,
"grad_norm": 0.7109060883522034,
"learning_rate": 4.306339018116714e-06,
"loss": 0.9454807043075562,
"step": 532
},
{
"epoch": 1.1840354767184036,
"grad_norm": 0.7484174370765686,
"learning_rate": 4.300368512582227e-06,
"loss": 1.0386492013931274,
"step": 534
},
{
"epoch": 1.188470066518847,
"grad_norm": 0.4899117946624756,
"learning_rate": 4.294377148721144e-06,
"loss": 0.8095374703407288,
"step": 536
},
{
"epoch": 1.1929046563192904,
"grad_norm": 1.52462899684906,
"learning_rate": 4.288365007139991e-06,
"loss": 1.0734546184539795,
"step": 538
},
{
"epoch": 1.1973392461197339,
"grad_norm": 3.1415209770202637,
"learning_rate": 4.2823321687248386e-06,
"loss": 0.6387287378311157,
"step": 540
},
{
"epoch": 1.2017738359201773,
"grad_norm": 2.35459303855896,
"learning_rate": 4.276278714640203e-06,
"loss": 0.6681326627731323,
"step": 542
},
{
"epoch": 1.206208425720621,
"grad_norm": 2.5117621421813965,
"learning_rate": 4.270204726327963e-06,
"loss": 1.1640082597732544,
"step": 544
},
{
"epoch": 1.2106430155210643,
"grad_norm": 0.46196478605270386,
"learning_rate": 4.264110285506259e-06,
"loss": 0.8398973941802979,
"step": 546
},
{
"epoch": 1.2150776053215078,
"grad_norm": 1.005468726158142,
"learning_rate": 4.257995474168395e-06,
"loss": 1.2058871984481812,
"step": 548
},
{
"epoch": 1.2195121951219512,
"grad_norm": 6.870185852050781,
"learning_rate": 4.251860374581736e-06,
"loss": 0.5818393230438232,
"step": 550
},
{
"epoch": 1.2239467849223946,
"grad_norm": 1.6000272035598755,
"learning_rate": 4.245705069286601e-06,
"loss": 1.0754560232162476,
"step": 552
},
{
"epoch": 1.2283813747228383,
"grad_norm": 6.755113124847412,
"learning_rate": 4.239529641095149e-06,
"loss": 0.7084805965423584,
"step": 554
},
{
"epoch": 1.2328159645232817,
"grad_norm": 0.39095139503479004,
"learning_rate": 4.233334173090274e-06,
"loss": 0.7805958986282349,
"step": 556
},
{
"epoch": 1.237250554323725,
"grad_norm": 2.378875255584717,
"learning_rate": 4.227118748624478e-06,
"loss": 0.844260036945343,
"step": 558
},
{
"epoch": 1.2416851441241685,
"grad_norm": 1.532080054283142,
"learning_rate": 4.220883451318753e-06,
"loss": 1.2891573905944824,
"step": 560
},
{
"epoch": 1.246119733924612,
"grad_norm": 4.011655330657959,
"learning_rate": 4.2146283650614545e-06,
"loss": 0.9298526048660278,
"step": 562
},
{
"epoch": 1.2505543237250554,
"grad_norm": 1.7476955652236938,
"learning_rate": 4.208353574007179e-06,
"loss": 0.6140811443328857,
"step": 564
},
{
"epoch": 1.2549889135254988,
"grad_norm": 0.7454321980476379,
"learning_rate": 4.202059162575622e-06,
"loss": 0.9352214932441711,
"step": 566
},
{
"epoch": 1.2594235033259422,
"grad_norm": 0.8128042221069336,
"learning_rate": 4.195745215450451e-06,
"loss": 0.8876186013221741,
"step": 568
},
{
"epoch": 1.2638580931263859,
"grad_norm": 0.8655123114585876,
"learning_rate": 4.189411817578159e-06,
"loss": 1.137557864189148,
"step": 570
},
{
"epoch": 1.2682926829268293,
"grad_norm": 0.7158671617507935,
"learning_rate": 4.1830590541669304e-06,
"loss": 0.73340904712677,
"step": 572
},
{
"epoch": 1.2727272727272727,
"grad_norm": 1.6793097257614136,
"learning_rate": 4.176687010685484e-06,
"loss": 1.366833209991455,
"step": 574
},
{
"epoch": 1.2771618625277161,
"grad_norm": 1.7137008905410767,
"learning_rate": 4.170295772861931e-06,
"loss": 1.051391363143921,
"step": 576
},
{
"epoch": 1.2815964523281598,
"grad_norm": 0.8713537454605103,
"learning_rate": 4.163885426682619e-06,
"loss": 1.1371718645095825,
"step": 578
},
{
"epoch": 1.2860310421286032,
"grad_norm": 0.9941657781600952,
"learning_rate": 4.157456058390977e-06,
"loss": 1.131047010421753,
"step": 580
},
{
"epoch": 1.2904656319290466,
"grad_norm": 2.815027952194214,
"learning_rate": 4.151007754486351e-06,
"loss": 1.3203078508377075,
"step": 582
},
{
"epoch": 1.29490022172949,
"grad_norm": 0.5185415148735046,
"learning_rate": 4.144540601722843e-06,
"loss": 0.7271389365196228,
"step": 584
},
{
"epoch": 1.2993348115299335,
"grad_norm": 0.6934378147125244,
"learning_rate": 4.138054687108143e-06,
"loss": 0.8247404098510742,
"step": 586
},
{
"epoch": 1.3037694013303769,
"grad_norm": 4.077749252319336,
"learning_rate": 4.131550097902361e-06,
"loss": 0.5715563297271729,
"step": 588
},
{
"epoch": 1.3082039911308203,
"grad_norm": 2.7654740810394287,
"learning_rate": 4.125026921616852e-06,
"loss": 0.9844896197319031,
"step": 590
},
{
"epoch": 1.3126385809312637,
"grad_norm": 1.3605704307556152,
"learning_rate": 4.118485246013031e-06,
"loss": 1.0650662183761597,
"step": 592
},
{
"epoch": 1.3170731707317074,
"grad_norm": 0.6844571828842163,
"learning_rate": 4.111925159101208e-06,
"loss": 1.0756279230117798,
"step": 594
},
{
"epoch": 1.3215077605321508,
"grad_norm": 0.6011850833892822,
"learning_rate": 4.1053467491393864e-06,
"loss": 1.0734484195709229,
"step": 596
},
{
"epoch": 1.3259423503325942,
"grad_norm": 1.0769504308700562,
"learning_rate": 4.098750104632091e-06,
"loss": 1.0580254793167114,
"step": 598
},
{
"epoch": 1.3303769401330376,
"grad_norm": 1.112815022468567,
"learning_rate": 4.092135314329165e-06,
"loss": 0.5043829083442688,
"step": 600
},
{
"epoch": 1.3348115299334813,
"grad_norm": 1.8235517740249634,
"learning_rate": 4.085502467224583e-06,
"loss": 0.9834141135215759,
"step": 602
},
{
"epoch": 1.3392461197339247,
"grad_norm": 0.8808879256248474,
"learning_rate": 4.078851652555254e-06,
"loss": 0.8073503971099854,
"step": 604
},
{
"epoch": 1.3436807095343681,
"grad_norm": 0.6147663593292236,
"learning_rate": 4.072182959799816e-06,
"loss": 0.8254027366638184,
"step": 606
},
{
"epoch": 1.3481152993348116,
"grad_norm": 2.649986743927002,
"learning_rate": 4.065496478677436e-06,
"loss": 1.089369297027588,
"step": 608
},
{
"epoch": 1.352549889135255,
"grad_norm": 9.459650993347168,
"learning_rate": 4.058792299146602e-06,
"loss": 1.1217542886734009,
"step": 610
},
{
"epoch": 1.3569844789356984,
"grad_norm": 1.946794033050537,
"learning_rate": 4.052070511403912e-06,
"loss": 0.6706306338310242,
"step": 612
},
{
"epoch": 1.3614190687361418,
"grad_norm": 0.9328871965408325,
"learning_rate": 4.045331205882863e-06,
"loss": 1.1241906881332397,
"step": 614
},
{
"epoch": 1.3658536585365852,
"grad_norm": 0.5184811949729919,
"learning_rate": 4.038574473252629e-06,
"loss": 0.712710976600647,
"step": 616
},
{
"epoch": 1.370288248337029,
"grad_norm": 1.2016468048095703,
"learning_rate": 4.031800404416849e-06,
"loss": 1.0043390989303589,
"step": 618
},
{
"epoch": 1.3747228381374723,
"grad_norm": 0.8439203500747681,
"learning_rate": 4.025009090512394e-06,
"loss": 0.08705577254295349,
"step": 620
},
{
"epoch": 1.3791574279379157,
"grad_norm": 0.5789482593536377,
"learning_rate": 4.018200622908153e-06,
"loss": 1.0090532302856445,
"step": 622
},
{
"epoch": 1.3835920177383592,
"grad_norm": 1.3330731391906738,
"learning_rate": 4.011375093203793e-06,
"loss": 1.034111738204956,
"step": 624
},
{
"epoch": 1.3880266075388026,
"grad_norm": 2.7468066215515137,
"learning_rate": 4.004532593228531e-06,
"loss": 0.962290346622467,
"step": 626
},
{
"epoch": 1.3924611973392462,
"grad_norm": 0.9098476767539978,
"learning_rate": 3.997673215039899e-06,
"loss": 1.0617891550064087,
"step": 628
},
{
"epoch": 1.3968957871396896,
"grad_norm": 2.727142333984375,
"learning_rate": 3.990797050922506e-06,
"loss": 1.072065830230713,
"step": 630
},
{
"epoch": 1.401330376940133,
"grad_norm": 0.8622321486473083,
"learning_rate": 3.9839041933867954e-06,
"loss": 0.8306987881660461,
"step": 632
},
{
"epoch": 1.4057649667405765,
"grad_norm": 1.51633882522583,
"learning_rate": 3.976994735167796e-06,
"loss": 0.9722529649734497,
"step": 634
},
{
"epoch": 1.41019955654102,
"grad_norm": 0.8044195771217346,
"learning_rate": 3.970068769223884e-06,
"loss": 1.1132667064666748,
"step": 636
},
{
"epoch": 1.4146341463414633,
"grad_norm": 6.232460021972656,
"learning_rate": 3.963126388735525e-06,
"loss": 0.82004714012146,
"step": 638
},
{
"epoch": 1.4190687361419068,
"grad_norm": 0.918382465839386,
"learning_rate": 3.956167687104021e-06,
"loss": 0.7192656993865967,
"step": 640
},
{
"epoch": 1.4235033259423504,
"grad_norm": 3.0365803241729736,
"learning_rate": 3.9491927579502584e-06,
"loss": 0.8175387382507324,
"step": 642
},
{
"epoch": 1.4279379157427938,
"grad_norm": 0.7935546636581421,
"learning_rate": 3.9422016951134415e-06,
"loss": 0.6935265064239502,
"step": 644
},
{
"epoch": 1.4323725055432373,
"grad_norm": 0.8461658954620361,
"learning_rate": 3.935194592649836e-06,
"loss": 1.2383686304092407,
"step": 646
},
{
"epoch": 1.4368070953436807,
"grad_norm": 0.6491639018058777,
"learning_rate": 3.928171544831501e-06,
"loss": 1.0858060121536255,
"step": 648
},
{
"epoch": 1.441241685144124,
"grad_norm": 1.7324135303497314,
"learning_rate": 3.921132646145019e-06,
"loss": 1.1424576044082642,
"step": 650
},
{
"epoch": 1.4456762749445677,
"grad_norm": 4.648967266082764,
"learning_rate": 3.914077991290232e-06,
"loss": 0.9448270201683044,
"step": 652
},
{
"epoch": 1.4501108647450112,
"grad_norm": 1.9809584617614746,
"learning_rate": 3.907007675178956e-06,
"loss": 1.05901038646698,
"step": 654
},
{
"epoch": 1.4545454545454546,
"grad_norm": 0.8580670356750488,
"learning_rate": 3.899921792933713e-06,
"loss": 0.8147414326667786,
"step": 656
},
{
"epoch": 1.458980044345898,
"grad_norm": 0.5463111996650696,
"learning_rate": 3.892820439886448e-06,
"loss": 0.7859951257705688,
"step": 658
},
{
"epoch": 1.4634146341463414,
"grad_norm": 2.406365394592285,
"learning_rate": 3.885703711577249e-06,
"loss": 1.037505865097046,
"step": 660
},
{
"epoch": 1.4678492239467849,
"grad_norm": 1.347562551498413,
"learning_rate": 3.8785717037530555e-06,
"loss": 1.0211855173110962,
"step": 662
},
{
"epoch": 1.4722838137472283,
"grad_norm": 0.9649538397789001,
"learning_rate": 3.871424512366377e-06,
"loss": 0.7922490239143372,
"step": 664
},
{
"epoch": 1.476718403547672,
"grad_norm": 0.6426183581352234,
"learning_rate": 3.864262233574e-06,
"loss": 0.6275416612625122,
"step": 666
},
{
"epoch": 1.4811529933481153,
"grad_norm": 2.499333143234253,
"learning_rate": 3.857084963735689e-06,
"loss": 0.6237704753875732,
"step": 668
},
{
"epoch": 1.4855875831485588,
"grad_norm": 0.956392765045166,
"learning_rate": 3.849892799412902e-06,
"loss": 0.9907911419868469,
"step": 670
},
{
"epoch": 1.4900221729490022,
"grad_norm": 1.0127171277999878,
"learning_rate": 3.84268583736748e-06,
"loss": 1.030133605003357,
"step": 672
},
{
"epoch": 1.4944567627494456,
"grad_norm": 3.757286548614502,
"learning_rate": 3.835464174560349e-06,
"loss": 0.6812074184417725,
"step": 674
},
{
"epoch": 1.4988913525498893,
"grad_norm": 2.872394561767578,
"learning_rate": 3.828227908150217e-06,
"loss": 0.7936379909515381,
"step": 676
},
{
"epoch": 1.5033259423503327,
"grad_norm": 4.6506123542785645,
"learning_rate": 3.820977135492266e-06,
"loss": 1.065293312072754,
"step": 678
},
{
"epoch": 1.507760532150776,
"grad_norm": 0.6903529167175293,
"learning_rate": 3.8137119541368415e-06,
"loss": 0.5796735882759094,
"step": 680
},
{
"epoch": 1.5121951219512195,
"grad_norm": 0.7172544002532959,
"learning_rate": 3.80643246182814e-06,
"loss": 0.5570440888404846,
"step": 682
},
{
"epoch": 1.516629711751663,
"grad_norm": 0.9278987646102905,
"learning_rate": 3.7991387565028963e-06,
"loss": 0.9937266111373901,
"step": 684
},
{
"epoch": 1.5210643015521064,
"grad_norm": 0.6855456829071045,
"learning_rate": 3.791830936289062e-06,
"loss": 0.991579532623291,
"step": 686
},
{
"epoch": 1.5254988913525498,
"grad_norm": 2.719144582748413,
"learning_rate": 3.784509099504488e-06,
"loss": 0.6759345531463623,
"step": 688
},
{
"epoch": 1.5299334811529932,
"grad_norm": 1.2889043092727661,
"learning_rate": 3.7771733446556025e-06,
"loss": 0.3256318271160126,
"step": 690
},
{
"epoch": 1.5343680709534369,
"grad_norm": 3.266566038131714,
"learning_rate": 3.7698237704360826e-06,
"loss": 0.770363986492157,
"step": 692
},
{
"epoch": 1.5388026607538803,
"grad_norm": 1.6477800607681274,
"learning_rate": 3.7624604757255297e-06,
"loss": 0.9243440628051758,
"step": 694
},
{
"epoch": 1.5432372505543237,
"grad_norm": 9.355045318603516,
"learning_rate": 3.7550835595881365e-06,
"loss": 0.6884693503379822,
"step": 696
},
{
"epoch": 1.5476718403547673,
"grad_norm": 0.7756222486495972,
"learning_rate": 3.747693121271355e-06,
"loss": 1.0920729637145996,
"step": 698
},
{
"epoch": 1.5521064301552108,
"grad_norm": 1.2968804836273193,
"learning_rate": 3.740289260204565e-06,
"loss": 1.0673658847808838,
"step": 700
},
{
"epoch": 1.5565410199556542,
"grad_norm": 1.133268117904663,
"learning_rate": 3.732872075997729e-06,
"loss": 1.0402312278747559,
"step": 702
},
{
"epoch": 1.5609756097560976,
"grad_norm": 1.2593156099319458,
"learning_rate": 3.725441668440058e-06,
"loss": 0.8764057159423828,
"step": 704
},
{
"epoch": 1.565410199556541,
"grad_norm": 2.54331111907959,
"learning_rate": 3.7179981374986683e-06,
"loss": 0.43431326746940613,
"step": 706
},
{
"epoch": 1.5698447893569845,
"grad_norm": 1.0765475034713745,
"learning_rate": 3.710541583317233e-06,
"loss": 1.0775222778320312,
"step": 708
},
{
"epoch": 1.5742793791574279,
"grad_norm": 3.7468576431274414,
"learning_rate": 3.70307210621464e-06,
"loss": 0.885796844959259,
"step": 710
},
{
"epoch": 1.5787139689578713,
"grad_norm": 7.981743335723877,
"learning_rate": 3.695589806683636e-06,
"loss": 0.6404878497123718,
"step": 712
},
{
"epoch": 1.5831485587583147,
"grad_norm": 0.843375027179718,
"learning_rate": 3.68809478538948e-06,
"loss": 1.0314189195632935,
"step": 714
},
{
"epoch": 1.5875831485587582,
"grad_norm": 1.4143157005310059,
"learning_rate": 3.6805871431685875e-06,
"loss": 1.1162283420562744,
"step": 716
},
{
"epoch": 1.5920177383592018,
"grad_norm": 1.6812629699707031,
"learning_rate": 3.6730669810271707e-06,
"loss": 1.224688172340393,
"step": 718
},
{
"epoch": 1.5964523281596452,
"grad_norm": 1.70530366897583,
"learning_rate": 3.665534400139885e-06,
"loss": 0.5482722520828247,
"step": 720
},
{
"epoch": 1.6008869179600886,
"grad_norm": 0.34640756249427795,
"learning_rate": 3.6579895018484635e-06,
"loss": 0.5939568877220154,
"step": 722
},
{
"epoch": 1.6053215077605323,
"grad_norm": 0.7377131581306458,
"learning_rate": 3.650432387660354e-06,
"loss": 1.0684458017349243,
"step": 724
},
{
"epoch": 1.6097560975609757,
"grad_norm": 0.7918015122413635,
"learning_rate": 3.6428631592473584e-06,
"loss": 1.0849140882492065,
"step": 726
},
{
"epoch": 1.6141906873614191,
"grad_norm": 0.6928529739379883,
"learning_rate": 3.6352819184442552e-06,
"loss": 0.7899186611175537,
"step": 728
},
{
"epoch": 1.6186252771618626,
"grad_norm": 4.092320442199707,
"learning_rate": 3.6276887672474374e-06,
"loss": 1.041346549987793,
"step": 730
},
{
"epoch": 1.623059866962306,
"grad_norm": 0.7040436863899231,
"learning_rate": 3.620083807813541e-06,
"loss": 1.0382251739501953,
"step": 732
},
{
"epoch": 1.6274944567627494,
"grad_norm": 1.0411463975906372,
"learning_rate": 3.6124671424580633e-06,
"loss": 1.0006555318832397,
"step": 734
},
{
"epoch": 1.6319290465631928,
"grad_norm": 0.41378095746040344,
"learning_rate": 3.604838873653991e-06,
"loss": 0.6166353821754456,
"step": 736
},
{
"epoch": 1.6363636363636362,
"grad_norm": 7.036901950836182,
"learning_rate": 3.597199104030424e-06,
"loss": 1.1837652921676636,
"step": 738
},
{
"epoch": 1.6407982261640797,
"grad_norm": 0.7952733039855957,
"learning_rate": 3.589547936371189e-06,
"loss": 1.0267434120178223,
"step": 740
},
{
"epoch": 1.6452328159645233,
"grad_norm": 2.4951283931732178,
"learning_rate": 3.58188547361346e-06,
"loss": 1.2115845680236816,
"step": 742
},
{
"epoch": 1.6496674057649667,
"grad_norm": 3.071096658706665,
"learning_rate": 3.574211818846374e-06,
"loss": 0.8251364231109619,
"step": 744
},
{
"epoch": 1.6541019955654102,
"grad_norm": 2.369654655456543,
"learning_rate": 3.566527075309641e-06,
"loss": 0.8760843276977539,
"step": 746
},
{
"epoch": 1.6585365853658538,
"grad_norm": 2.175527334213257,
"learning_rate": 3.558831346392159e-06,
"loss": 0.8911266922950745,
"step": 748
},
{
"epoch": 1.6629711751662972,
"grad_norm": 1.2222336530685425,
"learning_rate": 3.5511247356306205e-06,
"loss": 1.1212302446365356,
"step": 750
},
{
"epoch": 1.6674057649667406,
"grad_norm": 0.8364224433898926,
"learning_rate": 3.5434073467081183e-06,
"loss": 0.8240474462509155,
"step": 752
},
{
"epoch": 1.671840354767184,
"grad_norm": 2.646286964416504,
"learning_rate": 3.5356792834527533e-06,
"loss": 0.6273259520530701,
"step": 754
},
{
"epoch": 1.6762749445676275,
"grad_norm": 1.0103214979171753,
"learning_rate": 3.527940649836238e-06,
"loss": 0.9294993877410889,
"step": 756
},
{
"epoch": 1.680709534368071,
"grad_norm": 1.3476576805114746,
"learning_rate": 3.520191549972494e-06,
"loss": 0.9880755543708801,
"step": 758
},
{
"epoch": 1.6851441241685143,
"grad_norm": 2.302769899368286,
"learning_rate": 3.512432088116255e-06,
"loss": 0.8687876462936401,
"step": 760
},
{
"epoch": 1.6895787139689578,
"grad_norm": 1.9795390367507935,
"learning_rate": 3.5046623686616627e-06,
"loss": 1.0587050914764404,
"step": 762
},
{
"epoch": 1.6940133037694012,
"grad_norm": 1.420723557472229,
"learning_rate": 3.496882496140861e-06,
"loss": 1.0825788974761963,
"step": 764
},
{
"epoch": 1.6984478935698448,
"grad_norm": 2.4798290729522705,
"learning_rate": 3.4890925752225935e-06,
"loss": 0.9839310050010681,
"step": 766
},
{
"epoch": 1.7028824833702882,
"grad_norm": 1.6060822010040283,
"learning_rate": 3.48129271071079e-06,
"loss": 0.794252336025238,
"step": 768
},
{
"epoch": 1.7073170731707317,
"grad_norm": 1.1123030185699463,
"learning_rate": 3.4734830075431605e-06,
"loss": 1.0317720174789429,
"step": 770
},
{
"epoch": 1.7117516629711753,
"grad_norm": 2.067833423614502,
"learning_rate": 3.4656635707897823e-06,
"loss": 1.0553863048553467,
"step": 772
},
{
"epoch": 1.7161862527716187,
"grad_norm": 0.981274425983429,
"learning_rate": 3.457834505651687e-06,
"loss": 0.9536479115486145,
"step": 774
},
{
"epoch": 1.7206208425720622,
"grad_norm": 0.6783517003059387,
"learning_rate": 3.449995917459442e-06,
"loss": 1.062004566192627,
"step": 776
},
{
"epoch": 1.7250554323725056,
"grad_norm": 0.7117043137550354,
"learning_rate": 3.4421479116717394e-06,
"loss": 1.0369482040405273,
"step": 778
},
{
"epoch": 1.729490022172949,
"grad_norm": 1.9212546348571777,
"learning_rate": 3.4342905938739707e-06,
"loss": 0.6672749519348145,
"step": 780
},
{
"epoch": 1.7339246119733924,
"grad_norm": 0.7695720195770264,
"learning_rate": 3.4264240697768096e-06,
"loss": 1.0097278356552124,
"step": 782
},
{
"epoch": 1.7383592017738358,
"grad_norm": 0.6285500526428223,
"learning_rate": 3.418548445214791e-06,
"loss": 0.7331390380859375,
"step": 784
},
{
"epoch": 1.7427937915742793,
"grad_norm": 0.6695563793182373,
"learning_rate": 3.410663826144884e-06,
"loss": 0.8074593544006348,
"step": 786
},
{
"epoch": 1.7472283813747227,
"grad_norm": 0.7012442946434021,
"learning_rate": 3.4027703186450672e-06,
"loss": 0.7080385684967041,
"step": 788
},
{
"epoch": 1.7516629711751663,
"grad_norm": 2.9280431270599365,
"learning_rate": 3.394868028912906e-06,
"loss": 0.6388112902641296,
"step": 790
},
{
"epoch": 1.7560975609756098,
"grad_norm": 0.9185779690742493,
"learning_rate": 3.386957063264115e-06,
"loss": 0.8246527910232544,
"step": 792
},
{
"epoch": 1.7605321507760532,
"grad_norm": 1.0460119247436523,
"learning_rate": 3.3790375281311355e-06,
"loss": 0.9118836522102356,
"step": 794
},
{
"epoch": 1.7649667405764968,
"grad_norm": 2.923001766204834,
"learning_rate": 3.3711095300617015e-06,
"loss": 0.8226492404937744,
"step": 796
},
{
"epoch": 1.7694013303769403,
"grad_norm": 0.6730989813804626,
"learning_rate": 3.3631731757174048e-06,
"loss": 1.0575485229492188,
"step": 798
},
{
"epoch": 1.7738359201773837,
"grad_norm": 1.040733814239502,
"learning_rate": 3.3552285718722593e-06,
"loss": 1.0470877885818481,
"step": 800
},
{
"epoch": 1.778270509977827,
"grad_norm": 0.9821973443031311,
"learning_rate": 3.3472758254112662e-06,
"loss": 1.0410436391830444,
"step": 802
},
{
"epoch": 1.7827050997782705,
"grad_norm": 2.0623693466186523,
"learning_rate": 3.3393150433289795e-06,
"loss": 1.138871431350708,
"step": 804
},
{
"epoch": 1.787139689578714,
"grad_norm": 0.9976066946983337,
"learning_rate": 3.3313463327280576e-06,
"loss": 0.8022021651268005,
"step": 806
},
{
"epoch": 1.7915742793791574,
"grad_norm": 0.6746619343757629,
"learning_rate": 3.3233698008178306e-06,
"loss": 1.0426361560821533,
"step": 808
},
{
"epoch": 1.7960088691796008,
"grad_norm": 0.7335001230239868,
"learning_rate": 3.3153855549128537e-06,
"loss": 0.6199694275856018,
"step": 810
},
{
"epoch": 1.8004434589800442,
"grad_norm": 3.391047477722168,
"learning_rate": 3.3073937024314647e-06,
"loss": 0.5800549387931824,
"step": 812
},
{
"epoch": 1.8048780487804879,
"grad_norm": 1.8660929203033447,
"learning_rate": 3.2993943508943386e-06,
"loss": 1.197942852973938,
"step": 814
},
{
"epoch": 1.8093126385809313,
"grad_norm": 3.539306640625,
"learning_rate": 3.291387607923041e-06,
"loss": 0.9978748559951782,
"step": 816
},
{
"epoch": 1.8137472283813747,
"grad_norm": 0.7452203035354614,
"learning_rate": 3.283373581238582e-06,
"loss": 0.509084939956665,
"step": 818
},
{
"epoch": 1.8181818181818183,
"grad_norm": 1.6028074026107788,
"learning_rate": 3.2753523786599618e-06,
"loss": 1.0873044729232788,
"step": 820
},
{
"epoch": 1.8226164079822618,
"grad_norm": 1.7422271966934204,
"learning_rate": 3.2673241081027263e-06,
"loss": 1.0879627466201782,
"step": 822
},
{
"epoch": 1.8270509977827052,
"grad_norm": 1.1543694734573364,
"learning_rate": 3.259288877577512e-06,
"loss": 1.147214651107788,
"step": 824
},
{
"epoch": 1.8314855875831486,
"grad_norm": 1.4722660779953003,
"learning_rate": 3.251246795188592e-06,
"loss": 0.933826208114624,
"step": 826
},
{
"epoch": 1.835920177383592,
"grad_norm": 1.6987321376800537,
"learning_rate": 3.243197969132425e-06,
"loss": 0.7955626249313354,
"step": 828
},
{
"epoch": 1.8403547671840355,
"grad_norm": 0.9446114897727966,
"learning_rate": 3.2351425076961957e-06,
"loss": 1.023888349533081,
"step": 830
},
{
"epoch": 1.8447893569844789,
"grad_norm": 1.1627906560897827,
"learning_rate": 3.22708051925636e-06,
"loss": 1.0093810558319092,
"step": 832
},
{
"epoch": 1.8492239467849223,
"grad_norm": 0.7773818373680115,
"learning_rate": 3.219012112277189e-06,
"loss": 0.982840895652771,
"step": 834
},
{
"epoch": 1.8536585365853657,
"grad_norm": 0.9189027547836304,
"learning_rate": 3.210937395309304e-06,
"loss": 1.0559179782867432,
"step": 836
},
{
"epoch": 1.8580931263858091,
"grad_norm": 1.0602095127105713,
"learning_rate": 3.202856476988222e-06,
"loss": 1.0600286722183228,
"step": 838
},
{
"epoch": 1.8625277161862528,
"grad_norm": 1.2096672058105469,
"learning_rate": 3.1947694660328914e-06,
"loss": 1.1588785648345947,
"step": 840
},
{
"epoch": 1.8669623059866962,
"grad_norm": 1.3825271129608154,
"learning_rate": 3.1866764712442273e-06,
"loss": 0.6686919927597046,
"step": 842
},
{
"epoch": 1.8713968957871396,
"grad_norm": 1.1404961347579956,
"learning_rate": 3.1785776015036533e-06,
"loss": 0.7725078463554382,
"step": 844
},
{
"epoch": 1.8758314855875833,
"grad_norm": 2.0521693229675293,
"learning_rate": 3.1704729657716314e-06,
"loss": 0.8420175313949585,
"step": 846
},
{
"epoch": 1.8802660753880267,
"grad_norm": 1.6564451456069946,
"learning_rate": 3.1623626730861996e-06,
"loss": 0.9892827272415161,
"step": 848
},
{
"epoch": 1.8847006651884701,
"grad_norm": 1.1145013570785522,
"learning_rate": 3.1542468325615e-06,
"loss": 1.3681321144104004,
"step": 850
},
{
"epoch": 1.8891352549889135,
"grad_norm": 0.7389641404151917,
"learning_rate": 3.1461255533863183e-06,
"loss": 0.6859239339828491,
"step": 852
},
{
"epoch": 1.893569844789357,
"grad_norm": 0.873809814453125,
"learning_rate": 3.1379989448226077e-06,
"loss": 1.0703860521316528,
"step": 854
},
{
"epoch": 1.8980044345898004,
"grad_norm": 0.7670641541481018,
"learning_rate": 3.1298671162040236e-06,
"loss": 0.9328793883323669,
"step": 856
},
{
"epoch": 1.9024390243902438,
"grad_norm": 0.7553779482841492,
"learning_rate": 3.1217301769344488e-06,
"loss": 0.7479826211929321,
"step": 858
},
{
"epoch": 1.9068736141906872,
"grad_norm": 2.153320789337158,
"learning_rate": 3.1135882364865262e-06,
"loss": 0.7695887088775635,
"step": 860
},
{
"epoch": 1.9113082039911307,
"grad_norm": 11.15374755859375,
"learning_rate": 3.105441404400183e-06,
"loss": 1.089734435081482,
"step": 862
},
{
"epoch": 1.9157427937915743,
"grad_norm": 0.8938463926315308,
"learning_rate": 3.097289790281155e-06,
"loss": 0.805739164352417,
"step": 864
},
{
"epoch": 1.9201773835920177,
"grad_norm": 1.704505205154419,
"learning_rate": 3.089133503799517e-06,
"loss": 0.6438337564468384,
"step": 866
},
{
"epoch": 1.9246119733924612,
"grad_norm": 1.9099633693695068,
"learning_rate": 3.0809726546882045e-06,
"loss": 1.136403203010559,
"step": 868
},
{
"epoch": 1.9290465631929048,
"grad_norm": 0.8499243855476379,
"learning_rate": 3.0728073527415376e-06,
"loss": 1.0269296169281006,
"step": 870
},
{
"epoch": 1.9334811529933482,
"grad_norm": 0.5403093695640564,
"learning_rate": 3.0646377078137424e-06,
"loss": 0.6854551434516907,
"step": 872
},
{
"epoch": 1.9379157427937916,
"grad_norm": 1.254056692123413,
"learning_rate": 3.056463829817475e-06,
"loss": 0.6160570383071899,
"step": 874
},
{
"epoch": 1.942350332594235,
"grad_norm": 1.9854458570480347,
"learning_rate": 3.048285828722345e-06,
"loss": 0.6414442658424377,
"step": 876
},
{
"epoch": 1.9467849223946785,
"grad_norm": 2.5461316108703613,
"learning_rate": 3.0401038145534297e-06,
"loss": 0.7798886299133301,
"step": 878
},
{
"epoch": 1.951219512195122,
"grad_norm": 1.066082239151001,
"learning_rate": 3.031917897389799e-06,
"loss": 0.8321408629417419,
"step": 880
},
{
"epoch": 1.9556541019955653,
"grad_norm": 1.9094276428222656,
"learning_rate": 3.0237281873630335e-06,
"loss": 0.637435257434845,
"step": 882
},
{
"epoch": 1.9600886917960088,
"grad_norm": 0.6589820384979248,
"learning_rate": 3.0155347946557407e-06,
"loss": 0.9004865884780884,
"step": 884
},
{
"epoch": 1.9645232815964522,
"grad_norm": 0.6076062321662903,
"learning_rate": 3.007337829500075e-06,
"loss": 1.0442458391189575,
"step": 886
},
{
"epoch": 1.9689578713968958,
"grad_norm": 0.7619770169258118,
"learning_rate": 2.999137402176255e-06,
"loss": 1.0404667854309082,
"step": 888
},
{
"epoch": 1.9733924611973392,
"grad_norm": 0.72157222032547,
"learning_rate": 2.9909336230110747e-06,
"loss": 1.0282480716705322,
"step": 890
},
{
"epoch": 1.9778270509977827,
"grad_norm": 0.7795007824897766,
"learning_rate": 2.9827266023764274e-06,
"loss": 1.1448575258255005,
"step": 892
},
{
"epoch": 1.9822616407982263,
"grad_norm": 1.1579248905181885,
"learning_rate": 2.9745164506878134e-06,
"loss": 1.0330946445465088,
"step": 894
},
{
"epoch": 1.9866962305986697,
"grad_norm": 1.960567593574524,
"learning_rate": 2.9663032784028596e-06,
"loss": 0.8949038982391357,
"step": 896
},
{
"epoch": 1.9911308203991132,
"grad_norm": 1.6049984693527222,
"learning_rate": 2.9580871960198297e-06,
"loss": 0.9902207255363464,
"step": 898
},
{
"epoch": 1.9955654101995566,
"grad_norm": 0.6963536143302917,
"learning_rate": 2.949868314076142e-06,
"loss": 0.8349514007568359,
"step": 900
},
{
"epoch": 2.0,
"grad_norm": 2.399693250656128,
"learning_rate": 2.941646743146875e-06,
"loss": 1.027801513671875,
"step": 902
},
{
"epoch": 2.0044345898004434,
"grad_norm": 0.7714170813560486,
"learning_rate": 2.9334225938432868e-06,
"loss": 0.8111017942428589,
"step": 904
},
{
"epoch": 2.008869179600887,
"grad_norm": 0.9238205552101135,
"learning_rate": 2.925195976811326e-06,
"loss": 0.8706294298171997,
"step": 906
},
{
"epoch": 2.0133037694013303,
"grad_norm": 0.9386454224586487,
"learning_rate": 2.9169670027301387e-06,
"loss": 1.0718032121658325,
"step": 908
},
{
"epoch": 2.0177383592017737,
"grad_norm": 0.6776597499847412,
"learning_rate": 2.9087357823105843e-06,
"loss": 0.8469350337982178,
"step": 910
},
{
"epoch": 2.022172949002217,
"grad_norm": 0.7392479181289673,
"learning_rate": 2.9005024262937427e-06,
"loss": 0.565307080745697,
"step": 912
},
{
"epoch": 2.0266075388026605,
"grad_norm": 1.9702166318893433,
"learning_rate": 2.8922670454494247e-06,
"loss": 0.6154354810714722,
"step": 914
},
{
"epoch": 2.0310421286031044,
"grad_norm": 0.9764010310173035,
"learning_rate": 2.8840297505746843e-06,
"loss": 0.6004781126976013,
"step": 916
},
{
"epoch": 2.035476718403548,
"grad_norm": 0.8005667924880981,
"learning_rate": 2.8757906524923286e-06,
"loss": 0.8735350370407104,
"step": 918
},
{
"epoch": 2.0399113082039912,
"grad_norm": 0.9138982892036438,
"learning_rate": 2.867549862049419e-06,
"loss": 0.6707923412322998,
"step": 920
},
{
"epoch": 2.0443458980044347,
"grad_norm": 0.8569697737693787,
"learning_rate": 2.859307490115791e-06,
"loss": 0.7545152306556702,
"step": 922
},
{
"epoch": 2.048780487804878,
"grad_norm": 0.619215190410614,
"learning_rate": 2.8510636475825533e-06,
"loss": 0.30162450671195984,
"step": 924
},
{
"epoch": 2.0532150776053215,
"grad_norm": 1.0538161993026733,
"learning_rate": 2.8428184453606027e-06,
"loss": 0.8686986565589905,
"step": 926
},
{
"epoch": 2.057649667405765,
"grad_norm": 1.8079990148544312,
"learning_rate": 2.8345719943791266e-06,
"loss": 0.6156920790672302,
"step": 928
},
{
"epoch": 2.0620842572062084,
"grad_norm": 0.4749872088432312,
"learning_rate": 2.826324405584114e-06,
"loss": 0.5743507742881775,
"step": 930
},
{
"epoch": 2.066518847006652,
"grad_norm": 2.578096866607666,
"learning_rate": 2.818075789936863e-06,
"loss": 0.7628493905067444,
"step": 932
},
{
"epoch": 2.070953436807095,
"grad_norm": 1.2346030473709106,
"learning_rate": 2.8098262584124834e-06,
"loss": 0.8032713532447815,
"step": 934
},
{
"epoch": 2.0753880266075386,
"grad_norm": 0.9657310843467712,
"learning_rate": 2.801575921998411e-06,
"loss": 0.9190863966941833,
"step": 936
},
{
"epoch": 2.079822616407982,
"grad_norm": 0.8309587836265564,
"learning_rate": 2.7933248916929066e-06,
"loss": 0.5717717409133911,
"step": 938
},
{
"epoch": 2.084257206208426,
"grad_norm": 0.7853623628616333,
"learning_rate": 2.7850732785035705e-06,
"loss": 0.6060709357261658,
"step": 940
},
{
"epoch": 2.0886917960088693,
"grad_norm": 0.9614481329917908,
"learning_rate": 2.7768211934458417e-06,
"loss": 0.817306399345398,
"step": 942
},
{
"epoch": 2.0931263858093128,
"grad_norm": 0.9111508727073669,
"learning_rate": 2.768568747541509e-06,
"loss": 0.5221774578094482,
"step": 944
},
{
"epoch": 2.097560975609756,
"grad_norm": 0.8565526008605957,
"learning_rate": 2.7603160518172152e-06,
"loss": 0.8890185356140137,
"step": 946
},
{
"epoch": 2.1019955654101996,
"grad_norm": 1.2437005043029785,
"learning_rate": 2.752063217302966e-06,
"loss": 0.9257514476776123,
"step": 948
},
{
"epoch": 2.106430155210643,
"grad_norm": 1.692129373550415,
"learning_rate": 2.743810355030631e-06,
"loss": 0.6258480548858643,
"step": 950
},
{
"epoch": 2.1108647450110865,
"grad_norm": 2.0805153846740723,
"learning_rate": 2.735557576032458e-06,
"loss": 0.7526692152023315,
"step": 952
},
{
"epoch": 2.11529933481153,
"grad_norm": 2.8665034770965576,
"learning_rate": 2.727304991339569e-06,
"loss": 0.6801300644874573,
"step": 954
},
{
"epoch": 2.1197339246119733,
"grad_norm": 0.8256810307502747,
"learning_rate": 2.7190527119804762e-06,
"loss": 0.8693723678588867,
"step": 956
},
{
"epoch": 2.1241685144124167,
"grad_norm": 0.7408389449119568,
"learning_rate": 2.710800848979582e-06,
"loss": 0.7428317070007324,
"step": 958
},
{
"epoch": 2.12860310421286,
"grad_norm": 1.6947144269943237,
"learning_rate": 2.702549513355687e-06,
"loss": 0.688827395439148,
"step": 960
},
{
"epoch": 2.1330376940133036,
"grad_norm": 3.077284574508667,
"learning_rate": 2.694298816120497e-06,
"loss": 0.4587607979774475,
"step": 962
},
{
"epoch": 2.1374722838137474,
"grad_norm": 2.8907575607299805,
"learning_rate": 2.6860488682771306e-06,
"loss": 0.9636158347129822,
"step": 964
},
{
"epoch": 2.141906873614191,
"grad_norm": 1.936044454574585,
"learning_rate": 2.67779978081862e-06,
"loss": 0.7787302136421204,
"step": 966
},
{
"epoch": 2.1463414634146343,
"grad_norm": 0.9226675629615784,
"learning_rate": 2.669551664726428e-06,
"loss": 0.5220115780830383,
"step": 968
},
{
"epoch": 2.1507760532150777,
"grad_norm": 0.9738346338272095,
"learning_rate": 2.6613046309689433e-06,
"loss": 0.6782161593437195,
"step": 970
},
{
"epoch": 2.155210643015521,
"grad_norm": 0.9448055028915405,
"learning_rate": 2.6530587904999966e-06,
"loss": 0.6957781314849854,
"step": 972
},
{
"epoch": 2.1596452328159645,
"grad_norm": 1.6226963996887207,
"learning_rate": 2.6448142542573624e-06,
"loss": 0.8800594806671143,
"step": 974
},
{
"epoch": 2.164079822616408,
"grad_norm": 1.0587058067321777,
"learning_rate": 2.6365711331612692e-06,
"loss": 0.7791632413864136,
"step": 976
},
{
"epoch": 2.1685144124168514,
"grad_norm": 3.0447235107421875,
"learning_rate": 2.6283295381129066e-06,
"loss": 0.5873762965202332,
"step": 978
},
{
"epoch": 2.172949002217295,
"grad_norm": 1.2626818418502808,
"learning_rate": 2.620089579992933e-06,
"loss": 0.8509657979011536,
"step": 980
},
{
"epoch": 2.1773835920177382,
"grad_norm": 0.7221801280975342,
"learning_rate": 2.6118513696599823e-06,
"loss": 0.4064200818538666,
"step": 982
},
{
"epoch": 2.1818181818181817,
"grad_norm": 1.155705451965332,
"learning_rate": 2.603615017949178e-06,
"loss": 0.2613215446472168,
"step": 984
},
{
"epoch": 2.186252771618625,
"grad_norm": 3.4410154819488525,
"learning_rate": 2.595380635670634e-06,
"loss": 0.6540266275405884,
"step": 986
},
{
"epoch": 2.1906873614190685,
"grad_norm": 12.9176607131958,
"learning_rate": 2.5871483336079694e-06,
"loss": 0.528972864151001,
"step": 988
},
{
"epoch": 2.1951219512195124,
"grad_norm": 1.3602675199508667,
"learning_rate": 2.578918222516818e-06,
"loss": 0.875016987323761,
"step": 990
},
{
"epoch": 2.199556541019956,
"grad_norm": 1.8910794258117676,
"learning_rate": 2.5706904131233336e-06,
"loss": 0.9778388738632202,
"step": 992
},
{
"epoch": 2.203991130820399,
"grad_norm": 1.9940085411071777,
"learning_rate": 2.5624650161227073e-06,
"loss": 0.7324192523956299,
"step": 994
},
{
"epoch": 2.2084257206208426,
"grad_norm": 0.7409563660621643,
"learning_rate": 2.5542421421776696e-06,
"loss": 0.91463702917099,
"step": 996
},
{
"epoch": 2.212860310421286,
"grad_norm": 1.0401170253753662,
"learning_rate": 2.5460219019170097e-06,
"loss": 0.8429087400436401,
"step": 998
},
{
"epoch": 2.2172949002217295,
"grad_norm": 0.8627730011940002,
"learning_rate": 2.5378044059340845e-06,
"loss": 0.6182942986488342,
"step": 1000
},
{
"epoch": 2.221729490022173,
"grad_norm": 1.778159737586975,
"learning_rate": 2.5295897647853283e-06,
"loss": 0.41667890548706055,
"step": 1002
},
{
"epoch": 2.2261640798226163,
"grad_norm": 0.6985337734222412,
"learning_rate": 2.521378088988767e-06,
"loss": 0.9056693911552429,
"step": 1004
},
{
"epoch": 2.2305986696230597,
"grad_norm": 1.2205218076705933,
"learning_rate": 2.513169489022531e-06,
"loss": 0.7475935816764832,
"step": 1006
},
{
"epoch": 2.235033259423503,
"grad_norm": 0.8802860379219055,
"learning_rate": 2.5049640753233705e-06,
"loss": 0.9124072194099426,
"step": 1008
},
{
"epoch": 2.2394678492239466,
"grad_norm": 1.432633876800537,
"learning_rate": 2.496761958285167e-06,
"loss": 0.5237014293670654,
"step": 1010
},
{
"epoch": 2.2439024390243905,
"grad_norm": 0.8981291055679321,
"learning_rate": 2.488563248257451e-06,
"loss": 0.8662800192832947,
"step": 1012
},
{
"epoch": 2.248337028824834,
"grad_norm": 3.874892234802246,
"learning_rate": 2.4803680555439136e-06,
"loss": 0.7132869362831116,
"step": 1014
},
{
"epoch": 2.2527716186252773,
"grad_norm": 0.9583449959754944,
"learning_rate": 2.4721764904009272e-06,
"loss": 0.9416664838790894,
"step": 1016
},
{
"epoch": 2.2572062084257207,
"grad_norm": 1.920847773551941,
"learning_rate": 2.4639886630360574e-06,
"loss": 0.522179901599884,
"step": 1018
},
{
"epoch": 2.261640798226164,
"grad_norm": 0.8928440809249878,
"learning_rate": 2.455804683606584e-06,
"loss": 0.8632444739341736,
"step": 1020
},
{
"epoch": 2.2660753880266076,
"grad_norm": 0.40272587537765503,
"learning_rate": 2.4476246622180174e-06,
"loss": 0.4794633090496063,
"step": 1022
},
{
"epoch": 2.270509977827051,
"grad_norm": 1.993319034576416,
"learning_rate": 2.4394487089226158e-06,
"loss": 1.1585773229599,
"step": 1024
},
{
"epoch": 2.2749445676274944,
"grad_norm": 0.8279446363449097,
"learning_rate": 2.43127693371791e-06,
"loss": 0.6182215809822083,
"step": 1026
},
{
"epoch": 2.279379157427938,
"grad_norm": 0.7182877659797668,
"learning_rate": 2.423109446545213e-06,
"loss": 0.8768652677536011,
"step": 1028
},
{
"epoch": 2.2838137472283813,
"grad_norm": 1.7738810777664185,
"learning_rate": 2.4149463572881537e-06,
"loss": 0.9176031351089478,
"step": 1030
},
{
"epoch": 2.2882483370288247,
"grad_norm": 0.8209647536277771,
"learning_rate": 2.4067877757711907e-06,
"loss": 0.6851370930671692,
"step": 1032
},
{
"epoch": 2.292682926829268,
"grad_norm": 1.1586294174194336,
"learning_rate": 2.3986338117581357e-06,
"loss": 0.5924996137619019,
"step": 1034
},
{
"epoch": 2.2971175166297115,
"grad_norm": 1.9774837493896484,
"learning_rate": 2.390484574950677e-06,
"loss": 0.862289309501648,
"step": 1036
},
{
"epoch": 2.3015521064301554,
"grad_norm": 1.9414352178573608,
"learning_rate": 2.382340174986906e-06,
"loss": 0.7861872911453247,
"step": 1038
},
{
"epoch": 2.305986696230599,
"grad_norm": 2.3420515060424805,
"learning_rate": 2.374200721439837e-06,
"loss": 0.8478289246559143,
"step": 1040
},
{
"epoch": 2.3104212860310422,
"grad_norm": 1.3481749296188354,
"learning_rate": 2.3660663238159405e-06,
"loss": 0.901607096195221,
"step": 1042
},
{
"epoch": 2.3148558758314857,
"grad_norm": 0.6049371957778931,
"learning_rate": 2.357937091553662e-06,
"loss": 0.7081278562545776,
"step": 1044
},
{
"epoch": 2.319290465631929,
"grad_norm": 1.6102485656738281,
"learning_rate": 2.3498131340219554e-06,
"loss": 0.8813071846961975,
"step": 1046
},
{
"epoch": 2.3237250554323725,
"grad_norm": 0.9218488931655884,
"learning_rate": 2.341694560518809e-06,
"loss": 0.8351644277572632,
"step": 1048
},
{
"epoch": 2.328159645232816,
"grad_norm": 1.1544103622436523,
"learning_rate": 2.333581480269776e-06,
"loss": 0.9058061838150024,
"step": 1050
},
{
"epoch": 2.3325942350332594,
"grad_norm": 1.371621012687683,
"learning_rate": 2.325474002426503e-06,
"loss": 0.7822686433792114,
"step": 1052
},
{
"epoch": 2.337028824833703,
"grad_norm": 0.9093418717384338,
"learning_rate": 2.3173722360652644e-06,
"loss": 0.6994078755378723,
"step": 1054
},
{
"epoch": 2.341463414634146,
"grad_norm": 0.731285035610199,
"learning_rate": 2.309276290185494e-06,
"loss": 0.759069561958313,
"step": 1056
},
{
"epoch": 2.3458980044345896,
"grad_norm": 0.32100459933280945,
"learning_rate": 2.3011862737083162e-06,
"loss": 0.5203639268875122,
"step": 1058
},
{
"epoch": 2.3503325942350335,
"grad_norm": 1.7071316242218018,
"learning_rate": 2.2931022954750843e-06,
"loss": 0.9963219165802002,
"step": 1060
},
{
"epoch": 2.354767184035477,
"grad_norm": 5.787060737609863,
"learning_rate": 2.285024464245912e-06,
"loss": 0.4076020419597626,
"step": 1062
},
{
"epoch": 2.3592017738359203,
"grad_norm": 0.3795124590396881,
"learning_rate": 2.2769528886982158e-06,
"loss": 0.4680769443511963,
"step": 1064
},
{
"epoch": 2.3636363636363638,
"grad_norm": 4.307487964630127,
"learning_rate": 2.268887677425248e-06,
"loss": 0.7492026090621948,
"step": 1066
},
{
"epoch": 2.368070953436807,
"grad_norm": 0.6910419464111328,
"learning_rate": 2.2608289389346362e-06,
"loss": 0.9367992877960205,
"step": 1068
},
{
"epoch": 2.3725055432372506,
"grad_norm": 0.7431702613830566,
"learning_rate": 2.2527767816469263e-06,
"loss": 0.8184199929237366,
"step": 1070
},
{
"epoch": 2.376940133037694,
"grad_norm": 1.009751558303833,
"learning_rate": 2.244731313894121e-06,
"loss": 0.787276566028595,
"step": 1072
},
{
"epoch": 2.3813747228381374,
"grad_norm": 0.4790377914905548,
"learning_rate": 2.236692643918224e-06,
"loss": 0.024766096845269203,
"step": 1074
},
{
"epoch": 2.385809312638581,
"grad_norm": 1.139642357826233,
"learning_rate": 2.2286608798697834e-06,
"loss": 0.5393450856208801,
"step": 1076
},
{
"epoch": 2.3902439024390243,
"grad_norm": 1.150428295135498,
"learning_rate": 2.2206361298064343e-06,
"loss": 0.373291552066803,
"step": 1078
},
{
"epoch": 2.3946784922394677,
"grad_norm": 1.323978066444397,
"learning_rate": 2.2126185016914515e-06,
"loss": 0.6814523935317993,
"step": 1080
},
{
"epoch": 2.399113082039911,
"grad_norm": 0.38711783289909363,
"learning_rate": 2.2046081033922884e-06,
"loss": 0.5636866092681885,
"step": 1082
},
{
"epoch": 2.4035476718403546,
"grad_norm": 2.8012638092041016,
"learning_rate": 2.1966050426791325e-06,
"loss": 0.6869620084762573,
"step": 1084
},
{
"epoch": 2.4079822616407984,
"grad_norm": 0.8223492503166199,
"learning_rate": 2.1886094272234508e-06,
"loss": 0.7766349911689758,
"step": 1086
},
{
"epoch": 2.412416851441242,
"grad_norm": 0.4253004193305969,
"learning_rate": 2.1806213645965457e-06,
"loss": 0.3063335716724396,
"step": 1088
},
{
"epoch": 2.4168514412416853,
"grad_norm": 2.2866053581237793,
"learning_rate": 2.172640962268104e-06,
"loss": 0.8616948127746582,
"step": 1090
},
{
"epoch": 2.4212860310421287,
"grad_norm": 6.167764186859131,
"learning_rate": 2.1646683276047525e-06,
"loss": 1.083250641822815,
"step": 1092
},
{
"epoch": 2.425720620842572,
"grad_norm": 2.147050142288208,
"learning_rate": 2.156703567868615e-06,
"loss": 0.40848734974861145,
"step": 1094
},
{
"epoch": 2.4301552106430155,
"grad_norm": 2.4124085903167725,
"learning_rate": 2.148746790215866e-06,
"loss": 0.5506104826927185,
"step": 1096
},
{
"epoch": 2.434589800443459,
"grad_norm": 0.7438501119613647,
"learning_rate": 2.140798101695291e-06,
"loss": 0.4622332453727722,
"step": 1098
},
{
"epoch": 2.4390243902439024,
"grad_norm": 1.8033671379089355,
"learning_rate": 2.1328576092468476e-06,
"loss": 0.9211458563804626,
"step": 1100
},
{
"epoch": 2.443458980044346,
"grad_norm": 0.6345164775848389,
"learning_rate": 2.124925419700223e-06,
"loss": 0.9258796572685242,
"step": 1102
},
{
"epoch": 2.4478935698447892,
"grad_norm": 1.8896942138671875,
"learning_rate": 2.1170016397734e-06,
"loss": 0.4680280089378357,
"step": 1104
},
{
"epoch": 2.4523281596452327,
"grad_norm": 0.8533465266227722,
"learning_rate": 2.109086376071221e-06,
"loss": 0.830375611782074,
"step": 1106
},
{
"epoch": 2.4567627494456765,
"grad_norm": 2.495920181274414,
"learning_rate": 2.1011797350839513e-06,
"loss": 0.7596384286880493,
"step": 1108
},
{
"epoch": 2.4611973392461195,
"grad_norm": 0.9585152864456177,
"learning_rate": 2.093281823185848e-06,
"loss": 0.8787661790847778,
"step": 1110
},
{
"epoch": 2.4656319290465634,
"grad_norm": 0.6246011257171631,
"learning_rate": 2.0853927466337315e-06,
"loss": 0.578016459941864,
"step": 1112
},
{
"epoch": 2.470066518847007,
"grad_norm": 0.9855407476425171,
"learning_rate": 2.077512611565551e-06,
"loss": 0.8436506986618042,
"step": 1114
},
{
"epoch": 2.47450110864745,
"grad_norm": 2.1086971759796143,
"learning_rate": 2.0696415239989593e-06,
"loss": 0.35142093896865845,
"step": 1116
},
{
"epoch": 2.4789356984478936,
"grad_norm": 0.8225948810577393,
"learning_rate": 2.0617795898298855e-06,
"loss": 0.8377672433853149,
"step": 1118
},
{
"epoch": 2.483370288248337,
"grad_norm": 1.1087491512298584,
"learning_rate": 2.053926914831112e-06,
"loss": 0.7869824767112732,
"step": 1120
},
{
"epoch": 2.4878048780487805,
"grad_norm": 1.0127193927764893,
"learning_rate": 2.04608360465085e-06,
"loss": 0.8879801034927368,
"step": 1122
},
{
"epoch": 2.492239467849224,
"grad_norm": 6.3018717765808105,
"learning_rate": 2.038249764811318e-06,
"loss": 1.030903935432434,
"step": 1124
},
{
"epoch": 2.4966740576496673,
"grad_norm": 0.7648253440856934,
"learning_rate": 2.0304255007073227e-06,
"loss": 0.9654337763786316,
"step": 1126
},
{
"epoch": 2.5011086474501107,
"grad_norm": 1.2895153760910034,
"learning_rate": 2.022610917604842e-06,
"loss": 0.6278480291366577,
"step": 1128
},
{
"epoch": 2.505543237250554,
"grad_norm": 2.6871118545532227,
"learning_rate": 2.014806120639605e-06,
"loss": 0.6305501461029053,
"step": 1130
},
{
"epoch": 2.5099778270509976,
"grad_norm": 11.048127174377441,
"learning_rate": 2.007011214815684e-06,
"loss": 0.7386720776557922,
"step": 1132
},
{
"epoch": 2.5144124168514415,
"grad_norm": 1.0972473621368408,
"learning_rate": 1.9992263050040737e-06,
"loss": 0.5156517028808594,
"step": 1134
},
{
"epoch": 2.5188470066518844,
"grad_norm": 0.9979491829872131,
"learning_rate": 1.991451495941289e-06,
"loss": 0.908918023109436,
"step": 1136
},
{
"epoch": 2.5232815964523283,
"grad_norm": 0.9635651707649231,
"learning_rate": 1.983686892227948e-06,
"loss": 0.6651601791381836,
"step": 1138
},
{
"epoch": 2.5277161862527717,
"grad_norm": 1.025274395942688,
"learning_rate": 1.975932598327369e-06,
"loss": 0.8417225480079651,
"step": 1140
},
{
"epoch": 2.532150776053215,
"grad_norm": 10.28856086730957,
"learning_rate": 1.9681887185641646e-06,
"loss": 0.4462983310222626,
"step": 1142
},
{
"epoch": 2.5365853658536586,
"grad_norm": 0.5850323438644409,
"learning_rate": 1.9604553571228395e-06,
"loss": 0.5580345392227173,
"step": 1144
},
{
"epoch": 2.541019955654102,
"grad_norm": 1.3211580514907837,
"learning_rate": 1.9527326180463855e-06,
"loss": 0.9013155102729797,
"step": 1146
},
{
"epoch": 2.5454545454545454,
"grad_norm": 0.7220462560653687,
"learning_rate": 1.9450206052348823e-06,
"loss": 0.8708188533782959,
"step": 1148
},
{
"epoch": 2.549889135254989,
"grad_norm": 0.9700856804847717,
"learning_rate": 1.9373194224441028e-06,
"loss": 0.8098379969596863,
"step": 1150
},
{
"epoch": 2.5543237250554323,
"grad_norm": 1.2969111204147339,
"learning_rate": 1.929629173284114e-06,
"loss": 0.5333951711654663,
"step": 1152
},
{
"epoch": 2.5587583148558757,
"grad_norm": 0.45577237010002136,
"learning_rate": 1.9219499612178836e-06,
"loss": 0.3139987289905548,
"step": 1154
},
{
"epoch": 2.5631929046563195,
"grad_norm": 2.8768374919891357,
"learning_rate": 1.9142818895598908e-06,
"loss": 0.5258563756942749,
"step": 1156
},
{
"epoch": 2.5676274944567625,
"grad_norm": 2.359679937362671,
"learning_rate": 1.9066250614747317e-06,
"loss": 0.6680082082748413,
"step": 1158
},
{
"epoch": 2.5720620842572064,
"grad_norm": 1.424459457397461,
"learning_rate": 1.8989795799757348e-06,
"loss": 0.7696115970611572,
"step": 1160
},
{
"epoch": 2.57649667405765,
"grad_norm": 1.1945898532867432,
"learning_rate": 1.8913455479235754e-06,
"loss": 0.86902916431427,
"step": 1162
},
{
"epoch": 2.5809312638580932,
"grad_norm": 0.942388653755188,
"learning_rate": 1.8837230680248874e-06,
"loss": 0.8644537925720215,
"step": 1164
},
{
"epoch": 2.5853658536585367,
"grad_norm": 0.9040746092796326,
"learning_rate": 1.8761122428308875e-06,
"loss": 0.5937469601631165,
"step": 1166
},
{
"epoch": 2.58980044345898,
"grad_norm": 1.1163363456726074,
"learning_rate": 1.8685131747359902e-06,
"loss": 0.8463367819786072,
"step": 1168
},
{
"epoch": 2.5942350332594235,
"grad_norm": 1.2768183946609497,
"learning_rate": 1.8609259659764345e-06,
"loss": 0.8608056306838989,
"step": 1170
},
{
"epoch": 2.598669623059867,
"grad_norm": 1.604803204536438,
"learning_rate": 1.853350718628904e-06,
"loss": 0.3180133104324341,
"step": 1172
},
{
"epoch": 2.6031042128603104,
"grad_norm": 0.94985032081604,
"learning_rate": 1.845787534609157e-06,
"loss": 0.5958639979362488,
"step": 1174
},
{
"epoch": 2.6075388026607538,
"grad_norm": 0.8320684432983398,
"learning_rate": 1.8382365156706566e-06,
"loss": 0.6725127696990967,
"step": 1176
},
{
"epoch": 2.611973392461197,
"grad_norm": 1.2190077304840088,
"learning_rate": 1.8306977634031976e-06,
"loss": 0.6454840302467346,
"step": 1178
},
{
"epoch": 2.6164079822616406,
"grad_norm": 3.530073642730713,
"learning_rate": 1.8231713792315403e-06,
"loss": 0.6968674659729004,
"step": 1180
},
{
"epoch": 2.6208425720620845,
"grad_norm": 0.7371127605438232,
"learning_rate": 1.8156574644140495e-06,
"loss": 0.7820730805397034,
"step": 1182
},
{
"epoch": 2.6252771618625275,
"grad_norm": 0.9225616455078125,
"learning_rate": 1.8081561200413295e-06,
"loss": 0.8885502815246582,
"step": 1184
},
{
"epoch": 2.6297117516629713,
"grad_norm": 1.0041942596435547,
"learning_rate": 1.800667447034864e-06,
"loss": 0.863980770111084,
"step": 1186
},
{
"epoch": 2.6341463414634148,
"grad_norm": 2.831209421157837,
"learning_rate": 1.7931915461456573e-06,
"loss": 0.936083197593689,
"step": 1188
},
{
"epoch": 2.638580931263858,
"grad_norm": 0.8209375143051147,
"learning_rate": 1.7857285179528838e-06,
"loss": 0.5256603360176086,
"step": 1190
},
{
"epoch": 2.6430155210643016,
"grad_norm": 8.401679992675781,
"learning_rate": 1.7782784628625305e-06,
"loss": 0.807640552520752,
"step": 1192
},
{
"epoch": 2.647450110864745,
"grad_norm": 1.9250189065933228,
"learning_rate": 1.7708414811060437e-06,
"loss": 0.5451607704162598,
"step": 1194
},
{
"epoch": 2.6518847006651884,
"grad_norm": 1.3286583423614502,
"learning_rate": 1.763417672738989e-06,
"loss": 0.7666381001472473,
"step": 1196
},
{
"epoch": 2.656319290465632,
"grad_norm": 0.7077732086181641,
"learning_rate": 1.7560071376396953e-06,
"loss": 0.258193701505661,
"step": 1198
},
{
"epoch": 2.6607538802660753,
"grad_norm": 0.7076205611228943,
"learning_rate": 1.7486099755079197e-06,
"loss": 0.9763070344924927,
"step": 1200
},
{
"epoch": 2.6651884700665187,
"grad_norm": 0.667148232460022,
"learning_rate": 1.7412262858634987e-06,
"loss": 0.9205498099327087,
"step": 1202
},
{
"epoch": 2.6696230598669626,
"grad_norm": 1.1608986854553223,
"learning_rate": 1.7338561680450171e-06,
"loss": 0.4887681007385254,
"step": 1204
},
{
"epoch": 2.6740576496674056,
"grad_norm": 1.135400414466858,
"learning_rate": 1.7264997212084616e-06,
"loss": 0.7635675668716431,
"step": 1206
},
{
"epoch": 2.6784922394678494,
"grad_norm": 1.1827253103256226,
"learning_rate": 1.7191570443258976e-06,
"loss": 0.9846773147583008,
"step": 1208
},
{
"epoch": 2.682926829268293,
"grad_norm": 1.253548502922058,
"learning_rate": 1.711828236184131e-06,
"loss": 0.506197988986969,
"step": 1210
},
{
"epoch": 2.6873614190687363,
"grad_norm": 2.297689199447632,
"learning_rate": 1.704513395383378e-06,
"loss": 0.3404999375343323,
"step": 1212
},
{
"epoch": 2.6917960088691797,
"grad_norm": 0.9552398920059204,
"learning_rate": 1.6972126203359454e-06,
"loss": 0.5362938046455383,
"step": 1214
},
{
"epoch": 2.696230598669623,
"grad_norm": 4.347363471984863,
"learning_rate": 1.6899260092648995e-06,
"loss": 0.6291901469230652,
"step": 1216
},
{
"epoch": 2.7006651884700665,
"grad_norm": 2.0356476306915283,
"learning_rate": 1.6826536602027471e-06,
"loss": 0.6693211197853088,
"step": 1218
},
{
"epoch": 2.70509977827051,
"grad_norm": 0.9919900894165039,
"learning_rate": 1.6753956709901202e-06,
"loss": 0.9943591952323914,
"step": 1220
},
{
"epoch": 2.7095343680709534,
"grad_norm": 1.3586797714233398,
"learning_rate": 1.6681521392744515e-06,
"loss": 0.6912680268287659,
"step": 1222
},
{
"epoch": 2.713968957871397,
"grad_norm": 2.1318626403808594,
"learning_rate": 1.660923162508671e-06,
"loss": 0.8818514347076416,
"step": 1224
},
{
"epoch": 2.7184035476718402,
"grad_norm": 1.702278971672058,
"learning_rate": 1.6537088379498872e-06,
"loss": 0.4949290454387665,
"step": 1226
},
{
"epoch": 2.7228381374722836,
"grad_norm": 1.441737413406372,
"learning_rate": 1.6465092626580787e-06,
"loss": 0.991607666015625,
"step": 1228
},
{
"epoch": 2.7272727272727275,
"grad_norm": 1.105600357055664,
"learning_rate": 1.6393245334947942e-06,
"loss": 0.7393255233764648,
"step": 1230
},
{
"epoch": 2.7317073170731705,
"grad_norm": 0.6143295764923096,
"learning_rate": 1.6321547471218432e-06,
"loss": 0.812101423740387,
"step": 1232
},
{
"epoch": 2.7361419068736144,
"grad_norm": 2.9823520183563232,
"learning_rate": 1.6250000000000007e-06,
"loss": 0.41637933254241943,
"step": 1234
},
{
"epoch": 2.740576496674058,
"grad_norm": 0.9311825633049011,
"learning_rate": 1.6178603883877032e-06,
"loss": 0.8702437877655029,
"step": 1236
},
{
"epoch": 2.745011086474501,
"grad_norm": 0.8797629475593567,
"learning_rate": 1.6107360083397604e-06,
"loss": 0.5236175060272217,
"step": 1238
},
{
"epoch": 2.7494456762749446,
"grad_norm": 0.7002538442611694,
"learning_rate": 1.6036269557060594e-06,
"loss": 0.5518063306808472,
"step": 1240
},
{
"epoch": 2.753880266075388,
"grad_norm": 0.7229853272438049,
"learning_rate": 1.5965333261302735e-06,
"loss": 0.9187084436416626,
"step": 1242
},
{
"epoch": 2.7583148558758315,
"grad_norm": 2.7623326778411865,
"learning_rate": 1.5894552150485801e-06,
"loss": 0.8829126358032227,
"step": 1244
},
{
"epoch": 2.762749445676275,
"grad_norm": 1.0560897588729858,
"learning_rate": 1.5823927176883725e-06,
"loss": 0.44306543469429016,
"step": 1246
},
{
"epoch": 2.7671840354767183,
"grad_norm": 2.0120999813079834,
"learning_rate": 1.5753459290669792e-06,
"loss": 0.5320644974708557,
"step": 1248
},
{
"epoch": 2.7716186252771617,
"grad_norm": 1.5977369546890259,
"learning_rate": 1.5683149439903905e-06,
"loss": 0.6207846403121948,
"step": 1250
},
{
"epoch": 2.776053215077605,
"grad_norm": 1.803134560585022,
"learning_rate": 1.5612998570519746e-06,
"loss": 0.9645813703536987,
"step": 1252
},
{
"epoch": 2.7804878048780486,
"grad_norm": 0.9961108565330505,
"learning_rate": 1.5543007626312129e-06,
"loss": 0.9654743671417236,
"step": 1254
},
{
"epoch": 2.7849223946784925,
"grad_norm": 0.9151904582977295,
"learning_rate": 1.5473177548924267e-06,
"loss": 0.928931713104248,
"step": 1256
},
{
"epoch": 2.7893569844789354,
"grad_norm": 0.7890511155128479,
"learning_rate": 1.5403509277835077e-06,
"loss": 0.1760758012533188,
"step": 1258
},
{
"epoch": 2.7937915742793793,
"grad_norm": 0.9414036273956299,
"learning_rate": 1.5334003750346608e-06,
"loss": 0.8468573093414307,
"step": 1260
},
{
"epoch": 2.7982261640798227,
"grad_norm": 3.476710319519043,
"learning_rate": 1.5264661901571349e-06,
"loss": 0.5655899047851562,
"step": 1262
},
{
"epoch": 2.802660753880266,
"grad_norm": 0.9431162476539612,
"learning_rate": 1.5195484664419732e-06,
"loss": 0.4929567277431488,
"step": 1264
},
{
"epoch": 2.8070953436807096,
"grad_norm": 1.4319041967391968,
"learning_rate": 1.5126472969587502e-06,
"loss": 0.8094195127487183,
"step": 1266
},
{
"epoch": 2.811529933481153,
"grad_norm": 3.1319103240966797,
"learning_rate": 1.5057627745543269e-06,
"loss": 0.907342791557312,
"step": 1268
},
{
"epoch": 2.8159645232815964,
"grad_norm": 1.1402379274368286,
"learning_rate": 1.4988949918515947e-06,
"loss": 0.9050721526145935,
"step": 1270
},
{
"epoch": 2.82039911308204,
"grad_norm": 3.1753454208374023,
"learning_rate": 1.4920440412482345e-06,
"loss": 0.4124918282032013,
"step": 1272
},
{
"epoch": 2.8248337028824833,
"grad_norm": 1.2776395082473755,
"learning_rate": 1.485210014915473e-06,
"loss": 0.540270984172821,
"step": 1274
},
{
"epoch": 2.8292682926829267,
"grad_norm": 1.1165990829467773,
"learning_rate": 1.4783930047968388e-06,
"loss": 0.8869239091873169,
"step": 1276
},
{
"epoch": 2.8337028824833705,
"grad_norm": 1.8204717636108398,
"learning_rate": 1.4715931026069273e-06,
"loss": 0.8854659199714661,
"step": 1278
},
{
"epoch": 2.8381374722838135,
"grad_norm": 1.062864899635315,
"learning_rate": 1.4648103998301716e-06,
"loss": 0.5227654576301575,
"step": 1280
},
{
"epoch": 2.8425720620842574,
"grad_norm": 4.4705400466918945,
"learning_rate": 1.4580449877196035e-06,
"loss": 0.5940520763397217,
"step": 1282
},
{
"epoch": 2.847006651884701,
"grad_norm": 1.0326560735702515,
"learning_rate": 1.4512969572956328e-06,
"loss": 0.6471421122550964,
"step": 1284
},
{
"epoch": 2.8514412416851442,
"grad_norm": 0.8020298480987549,
"learning_rate": 1.4445663993448173e-06,
"loss": 0.8675932884216309,
"step": 1286
},
{
"epoch": 2.8558758314855877,
"grad_norm": 2.7269020080566406,
"learning_rate": 1.437853404418646e-06,
"loss": 0.2647731602191925,
"step": 1288
},
{
"epoch": 2.860310421286031,
"grad_norm": 1.6477257013320923,
"learning_rate": 1.431158062832318e-06,
"loss": 0.9172856211662292,
"step": 1290
},
{
"epoch": 2.8647450110864745,
"grad_norm": 1.286494255065918,
"learning_rate": 1.4244804646635266e-06,
"loss": 0.8936296105384827,
"step": 1292
},
{
"epoch": 2.869179600886918,
"grad_norm": 2.2755205631256104,
"learning_rate": 1.4178206997512522e-06,
"loss": 0.8551170229911804,
"step": 1294
},
{
"epoch": 2.8736141906873613,
"grad_norm": 0.80238938331604,
"learning_rate": 1.4111788576945467e-06,
"loss": 0.8603898882865906,
"step": 1296
},
{
"epoch": 2.8780487804878048,
"grad_norm": 1.9288290739059448,
"learning_rate": 1.4045550278513351e-06,
"loss": 0.5612266659736633,
"step": 1298
},
{
"epoch": 2.882483370288248,
"grad_norm": 0.8210675716400146,
"learning_rate": 1.3979492993372074e-06,
"loss": 0.9199106097221375,
"step": 1300
},
{
"epoch": 2.8869179600886916,
"grad_norm": 2.33469557762146,
"learning_rate": 1.391361761024222e-06,
"loss": 0.26451584696769714,
"step": 1302
},
{
"epoch": 2.8913525498891355,
"grad_norm": 0.852741539478302,
"learning_rate": 1.3847925015397146e-06,
"loss": 0.8316776156425476,
"step": 1304
},
{
"epoch": 2.8957871396895785,
"grad_norm": 1.808684229850769,
"learning_rate": 1.3782416092650957e-06,
"loss": 0.7958760261535645,
"step": 1306
},
{
"epoch": 2.9002217294900223,
"grad_norm": 0.3986283838748932,
"learning_rate": 1.3717091723346699e-06,
"loss": 0.35594817996025085,
"step": 1308
},
{
"epoch": 2.9046563192904657,
"grad_norm": 1.0733470916748047,
"learning_rate": 1.3651952786344485e-06,
"loss": 0.4130232036113739,
"step": 1310
},
{
"epoch": 2.909090909090909,
"grad_norm": 2.3315577507019043,
"learning_rate": 1.3587000158009638e-06,
"loss": 0.8895840048789978,
"step": 1312
},
{
"epoch": 2.9135254988913526,
"grad_norm": 0.7047470808029175,
"learning_rate": 1.3522234712200954e-06,
"loss": 0.8686839938163757,
"step": 1314
},
{
"epoch": 2.917960088691796,
"grad_norm": 5.497637748718262,
"learning_rate": 1.3457657320258878e-06,
"loss": 0.8700605034828186,
"step": 1316
},
{
"epoch": 2.9223946784922394,
"grad_norm": 0.7429857850074768,
"learning_rate": 1.3393268850993852e-06,
"loss": 0.8899486064910889,
"step": 1318
},
{
"epoch": 2.926829268292683,
"grad_norm": 0.9991167187690735,
"learning_rate": 1.332907017067458e-06,
"loss": 0.6314383745193481,
"step": 1320
},
{
"epoch": 2.9312638580931263,
"grad_norm": 0.7007908821105957,
"learning_rate": 1.3265062143016378e-06,
"loss": 0.8329391479492188,
"step": 1322
},
{
"epoch": 2.9356984478935697,
"grad_norm": 0.8860281705856323,
"learning_rate": 1.3201245629169574e-06,
"loss": 0.884133517742157,
"step": 1324
},
{
"epoch": 2.9401330376940136,
"grad_norm": 1.6985708475112915,
"learning_rate": 1.3137621487707902e-06,
"loss": 0.8119447827339172,
"step": 1326
},
{
"epoch": 2.9445676274944566,
"grad_norm": 1.1318223476409912,
"learning_rate": 1.307419057461697e-06,
"loss": 0.9324439167976379,
"step": 1328
},
{
"epoch": 2.9490022172949004,
"grad_norm": 1.2057431936264038,
"learning_rate": 1.3010953743282724e-06,
"loss": 0.8887814283370972,
"step": 1330
},
{
"epoch": 2.953436807095344,
"grad_norm": 1.5262060165405273,
"learning_rate": 1.294791184447996e-06,
"loss": 0.9244763255119324,
"step": 1332
},
{
"epoch": 2.9578713968957873,
"grad_norm": 0.9449637532234192,
"learning_rate": 1.2885065726360925e-06,
"loss": 0.43268677592277527,
"step": 1334
},
{
"epoch": 2.9623059866962307,
"grad_norm": 0.9329234957695007,
"learning_rate": 1.282241623444386e-06,
"loss": 0.8470190763473511,
"step": 1336
},
{
"epoch": 2.966740576496674,
"grad_norm": 2.124790668487549,
"learning_rate": 1.2759964211601633e-06,
"loss": 0.9702441096305847,
"step": 1338
},
{
"epoch": 2.9711751662971175,
"grad_norm": 0.800217866897583,
"learning_rate": 1.269771049805042e-06,
"loss": 0.6707828044891357,
"step": 1340
},
{
"epoch": 2.975609756097561,
"grad_norm": 2.3894753456115723,
"learning_rate": 1.2635655931338364e-06,
"loss": 0.5933993458747864,
"step": 1342
},
{
"epoch": 2.9800443458980044,
"grad_norm": 0.6761320233345032,
"learning_rate": 1.2573801346334355e-06,
"loss": 0.18841305375099182,
"step": 1344
},
{
"epoch": 2.984478935698448,
"grad_norm": 0.9729594588279724,
"learning_rate": 1.251214757521675e-06,
"loss": 0.5959298610687256,
"step": 1346
},
{
"epoch": 2.988913525498891,
"grad_norm": 0.9480107426643372,
"learning_rate": 1.2450695447462214e-06,
"loss": 0.506874144077301,
"step": 1348
},
{
"epoch": 2.9933481152993346,
"grad_norm": 6.189738750457764,
"learning_rate": 1.2389445789834534e-06,
"loss": 0.5849905610084534,
"step": 1350
},
{
"epoch": 2.9977827050997785,
"grad_norm": 0.7872065305709839,
"learning_rate": 1.2328399426373511e-06,
"loss": 0.7041124701499939,
"step": 1352
},
{
"epoch": 3.002217294900222,
"grad_norm": 0.7769404649734497,
"learning_rate": 1.2267557178383886e-06,
"loss": 0.8147103786468506,
"step": 1354
},
{
"epoch": 3.0066518847006654,
"grad_norm": 0.6591900587081909,
"learning_rate": 1.220691986442424e-06,
"loss": 0.41049686074256897,
"step": 1356
},
{
"epoch": 3.011086474501109,
"grad_norm": 1.4137377738952637,
"learning_rate": 1.2146488300296047e-06,
"loss": 0.5945199131965637,
"step": 1358
},
{
"epoch": 3.015521064301552,
"grad_norm": 2.8482906818389893,
"learning_rate": 1.2086263299032652e-06,
"loss": 0.7281069159507751,
"step": 1360
},
{
"epoch": 3.0199556541019956,
"grad_norm": 0.9070174098014832,
"learning_rate": 1.2026245670888343e-06,
"loss": 0.6940351724624634,
"step": 1362
},
{
"epoch": 3.024390243902439,
"grad_norm": 0.9107074737548828,
"learning_rate": 1.196643622332747e-06,
"loss": 0.8520534634590149,
"step": 1364
},
{
"epoch": 3.0288248337028825,
"grad_norm": 0.2674312889575958,
"learning_rate": 1.1906835761013547e-06,
"loss": 0.34424373507499695,
"step": 1366
},
{
"epoch": 3.033259423503326,
"grad_norm": 0.9089600443840027,
"learning_rate": 1.184744508579846e-06,
"loss": 0.5831412076950073,
"step": 1368
},
{
"epoch": 3.0376940133037693,
"grad_norm": 1.4997162818908691,
"learning_rate": 1.178826499671167e-06,
"loss": 0.5012683272361755,
"step": 1370
},
{
"epoch": 3.0421286031042127,
"grad_norm": 1.6579214334487915,
"learning_rate": 1.172929628994943e-06,
"loss": 0.7454507350921631,
"step": 1372
},
{
"epoch": 3.046563192904656,
"grad_norm": 1.7701013088226318,
"learning_rate": 1.167053975886413e-06,
"loss": 0.2646133005619049,
"step": 1374
},
{
"epoch": 3.0509977827050996,
"grad_norm": 2.8594934940338135,
"learning_rate": 1.1611996193953569e-06,
"loss": 0.42731595039367676,
"step": 1376
},
{
"epoch": 3.0554323725055434,
"grad_norm": 0.9399404525756836,
"learning_rate": 1.1553666382850366e-06,
"loss": 0.2942492365837097,
"step": 1378
},
{
"epoch": 3.059866962305987,
"grad_norm": 0.2747500240802765,
"learning_rate": 1.1495551110311324e-06,
"loss": 0.3034818172454834,
"step": 1380
},
{
"epoch": 3.0643015521064303,
"grad_norm": 1.7954272031784058,
"learning_rate": 1.1437651158206904e-06,
"loss": 0.23530253767967224,
"step": 1382
},
{
"epoch": 3.0687361419068737,
"grad_norm": 1.6788283586502075,
"learning_rate": 1.137996730551069e-06,
"loss": 0.5282564163208008,
"step": 1384
},
{
"epoch": 3.073170731707317,
"grad_norm": 0.3840465843677521,
"learning_rate": 1.1322500328288897e-06,
"loss": 0.2809971570968628,
"step": 1386
},
{
"epoch": 3.0776053215077606,
"grad_norm": 0.16582423448562622,
"learning_rate": 1.1265250999689966e-06,
"loss": 0.28406068682670593,
"step": 1388
},
{
"epoch": 3.082039911308204,
"grad_norm": 1.3691731691360474,
"learning_rate": 1.1208220089934118e-06,
"loss": 0.5798223614692688,
"step": 1390
},
{
"epoch": 3.0864745011086474,
"grad_norm": 8.64896297454834,
"learning_rate": 1.1151408366303024e-06,
"loss": 0.2631751298904419,
"step": 1392
},
{
"epoch": 3.090909090909091,
"grad_norm": 3.6928718090057373,
"learning_rate": 1.1094816593129475e-06,
"loss": 0.40734031796455383,
"step": 1394
},
{
"epoch": 3.0953436807095343,
"grad_norm": 1.541538119316101,
"learning_rate": 1.1038445531787083e-06,
"loss": 0.6400181651115417,
"step": 1396
},
{
"epoch": 3.0997782705099777,
"grad_norm": 1.3748445510864258,
"learning_rate": 1.098229594068007e-06,
"loss": 0.7110243439674377,
"step": 1398
},
{
"epoch": 3.104212860310421,
"grad_norm": 0.5643441081047058,
"learning_rate": 1.0926368575233032e-06,
"loss": 0.5485574007034302,
"step": 1400
},
{
"epoch": 3.1086474501108645,
"grad_norm": 0.3888166546821594,
"learning_rate": 1.087066418788078e-06,
"loss": 0.215177983045578,
"step": 1402
},
{
"epoch": 3.1130820399113084,
"grad_norm": 1.9723485708236694,
"learning_rate": 1.0815183528058248e-06,
"loss": 0.4210178256034851,
"step": 1404
},
{
"epoch": 3.117516629711752,
"grad_norm": 0.8655666708946228,
"learning_rate": 1.0759927342190362e-06,
"loss": 0.6706216335296631,
"step": 1406
},
{
"epoch": 3.1219512195121952,
"grad_norm": 0.9195745587348938,
"learning_rate": 1.0704896373682052e-06,
"loss": 0.7625486850738525,
"step": 1408
},
{
"epoch": 3.1263858093126387,
"grad_norm": 0.7832412123680115,
"learning_rate": 1.0650091362908189e-06,
"loss": 0.6624658107757568,
"step": 1410
},
{
"epoch": 3.130820399113082,
"grad_norm": 1.7871705293655396,
"learning_rate": 1.0595513047203693e-06,
"loss": 0.7058153748512268,
"step": 1412
},
{
"epoch": 3.1352549889135255,
"grad_norm": 1.7586992979049683,
"learning_rate": 1.0541162160853538e-06,
"loss": 0.47856301069259644,
"step": 1414
},
{
"epoch": 3.139689578713969,
"grad_norm": 0.526236891746521,
"learning_rate": 1.0487039435082941e-06,
"loss": 0.062277086079120636,
"step": 1416
},
{
"epoch": 3.1441241685144123,
"grad_norm": 1.4470797777175903,
"learning_rate": 1.0433145598047495e-06,
"loss": 0.7115893363952637,
"step": 1418
},
{
"epoch": 3.1485587583148558,
"grad_norm": 0.995538592338562,
"learning_rate": 1.0379481374823358e-06,
"loss": 0.6006544232368469,
"step": 1420
},
{
"epoch": 3.152993348115299,
"grad_norm": 1.696830153465271,
"learning_rate": 1.032604748739751e-06,
"loss": 0.6471589207649231,
"step": 1422
},
{
"epoch": 3.1574279379157426,
"grad_norm": 0.5295472145080566,
"learning_rate": 1.0272844654658069e-06,
"loss": 0.0932040736079216,
"step": 1424
},
{
"epoch": 3.1618625277161865,
"grad_norm": 2.297607660293579,
"learning_rate": 1.0219873592384556e-06,
"loss": 0.5828940272331238,
"step": 1426
},
{
"epoch": 3.16629711751663,
"grad_norm": 1.9336421489715576,
"learning_rate": 1.016713501323834e-06,
"loss": 0.7437405586242676,
"step": 1428
},
{
"epoch": 3.1707317073170733,
"grad_norm": 1.690544605255127,
"learning_rate": 1.0114629626752973e-06,
"loss": 0.3497503399848938,
"step": 1430
},
{
"epoch": 3.1751662971175167,
"grad_norm": 16.42621421813965,
"learning_rate": 1.0062358139324715e-06,
"loss": 0.40635061264038086,
"step": 1432
},
{
"epoch": 3.17960088691796,
"grad_norm": 1.3304423093795776,
"learning_rate": 1.0010321254202992e-06,
"loss": 0.5058741569519043,
"step": 1434
},
{
"epoch": 3.1840354767184036,
"grad_norm": 2.4488651752471924,
"learning_rate": 9.958519671480919e-07,
"loss": 0.20857569575309753,
"step": 1436
},
{
"epoch": 3.188470066518847,
"grad_norm": 0.8199713826179504,
"learning_rate": 9.906954088085929e-07,
"loss": 0.4270777106285095,
"step": 1438
},
{
"epoch": 3.1929046563192904,
"grad_norm": 3.093682289123535,
"learning_rate": 9.85562519777035e-07,
"loss": 0.5151862502098083,
"step": 1440
},
{
"epoch": 3.197339246119734,
"grad_norm": 1.1956168413162231,
"learning_rate": 9.804533691102112e-07,
"loss": 0.658134400844574,
"step": 1442
},
{
"epoch": 3.2017738359201773,
"grad_norm": 0.7333883047103882,
"learning_rate": 9.75368025545542e-07,
"loss": 0.7828593254089355,
"step": 1444
},
{
"epoch": 3.2062084257206207,
"grad_norm": 0.716337263584137,
"learning_rate": 9.703065575001518e-07,
"loss": 0.6528302431106567,
"step": 1446
},
{
"epoch": 3.210643015521064,
"grad_norm": 2.811972141265869,
"learning_rate": 9.65269033069952e-07,
"loss": 0.4046706557273865,
"step": 1448
},
{
"epoch": 3.2150776053215075,
"grad_norm": 0.9280921816825867,
"learning_rate": 9.602555200287184e-07,
"loss": 0.7239767909049988,
"step": 1450
},
{
"epoch": 3.2195121951219514,
"grad_norm": 2.487478256225586,
"learning_rate": 9.552660858271835e-07,
"loss": 0.5565465688705444,
"step": 1452
},
{
"epoch": 3.223946784922395,
"grad_norm": 0.8375023603439331,
"learning_rate": 9.503007975921294e-07,
"loss": 0.49734872579574585,
"step": 1454
},
{
"epoch": 3.2283813747228383,
"grad_norm": 6.227933883666992,
"learning_rate": 9.453597221254821e-07,
"loss": 0.7493337392807007,
"step": 1456
},
{
"epoch": 3.2328159645232817,
"grad_norm": 2.4948275089263916,
"learning_rate": 9.404429259034156e-07,
"loss": 0.41658228635787964,
"step": 1458
},
{
"epoch": 3.237250554323725,
"grad_norm": 0.9919694066047668,
"learning_rate": 9.355504750754543e-07,
"loss": 0.7514620423316956,
"step": 1460
},
{
"epoch": 3.2416851441241685,
"grad_norm": 0.985115647315979,
"learning_rate": 9.306824354635866e-07,
"loss": 0.46349745988845825,
"step": 1462
},
{
"epoch": 3.246119733924612,
"grad_norm": 3.361206531524658,
"learning_rate": 9.258388725613776e-07,
"loss": 0.6378527879714966,
"step": 1464
},
{
"epoch": 3.2505543237250554,
"grad_norm": 1.1187552213668823,
"learning_rate": 9.21019851533086e-07,
"loss": 0.7121803164482117,
"step": 1466
},
{
"epoch": 3.254988913525499,
"grad_norm": 0.5266161561012268,
"learning_rate": 9.162254372127921e-07,
"loss": 0.09279008209705353,
"step": 1468
},
{
"epoch": 3.259423503325942,
"grad_norm": 2.9691877365112305,
"learning_rate": 9.114556941035199e-07,
"loss": 0.8856503367424011,
"step": 1470
},
{
"epoch": 3.2638580931263856,
"grad_norm": 1.2273247241973877,
"learning_rate": 9.067106863763752e-07,
"loss": 0.7877327799797058,
"step": 1472
},
{
"epoch": 3.2682926829268295,
"grad_norm": 1.129638671875,
"learning_rate": 9.01990477869677e-07,
"loss": 0.297911673784256,
"step": 1474
},
{
"epoch": 3.2727272727272725,
"grad_norm": 0.7427173852920532,
"learning_rate": 8.972951320881014e-07,
"loss": 0.5932102203369141,
"step": 1476
},
{
"epoch": 3.2771618625277164,
"grad_norm": 2.747760772705078,
"learning_rate": 8.92624712201827e-07,
"loss": 0.38971173763275146,
"step": 1478
},
{
"epoch": 3.2815964523281598,
"grad_norm": 0.9102936387062073,
"learning_rate": 8.879792810456861e-07,
"loss": 0.46492668986320496,
"step": 1480
},
{
"epoch": 3.286031042128603,
"grad_norm": 0.7271443009376526,
"learning_rate": 8.833589011183147e-07,
"loss": 0.6474693417549133,
"step": 1482
},
{
"epoch": 3.2904656319290466,
"grad_norm": 0.862979531288147,
"learning_rate": 8.78763634581318e-07,
"loss": 0.517356276512146,
"step": 1484
},
{
"epoch": 3.29490022172949,
"grad_norm": 0.7722979784011841,
"learning_rate": 8.741935432584292e-07,
"loss": 0.3972328305244446,
"step": 1486
},
{
"epoch": 3.2993348115299335,
"grad_norm": 2.848907232284546,
"learning_rate": 8.696486886346805e-07,
"loss": 0.4415854513645172,
"step": 1488
},
{
"epoch": 3.303769401330377,
"grad_norm": 7.881778240203857,
"learning_rate": 8.651291318555745e-07,
"loss": 0.3040315508842468,
"step": 1490
},
{
"epoch": 3.3082039911308203,
"grad_norm": 0.8357083201408386,
"learning_rate": 8.606349337262623e-07,
"loss": 0.9919511675834656,
"step": 1492
},
{
"epoch": 3.3126385809312637,
"grad_norm": 1.3582104444503784,
"learning_rate": 8.561661547107243e-07,
"loss": 0.5778933763504028,
"step": 1494
},
{
"epoch": 3.317073170731707,
"grad_norm": 3.0428314208984375,
"learning_rate": 8.517228549309588e-07,
"loss": 0.5790684223175049,
"step": 1496
},
{
"epoch": 3.3215077605321506,
"grad_norm": 1.9520448446273804,
"learning_rate": 8.473050941661717e-07,
"loss": 0.7469807267189026,
"step": 1498
},
{
"epoch": 3.3259423503325944,
"grad_norm": 0.7510539293289185,
"learning_rate": 8.429129318519711e-07,
"loss": 0.4080921411514282,
"step": 1500
},
{
"epoch": 3.330376940133038,
"grad_norm": 0.9013833999633789,
"learning_rate": 8.38546427079571e-07,
"loss": 0.6411795616149902,
"step": 1502
},
{
"epoch": 3.3348115299334813,
"grad_norm": 1.3937565088272095,
"learning_rate": 8.342056385949929e-07,
"loss": 0.7303662300109863,
"step": 1504
},
{
"epoch": 3.3392461197339247,
"grad_norm": 0.22746898233890533,
"learning_rate": 8.298906247982768e-07,
"loss": 0.4033330976963043,
"step": 1506
},
{
"epoch": 3.343680709534368,
"grad_norm": 0.9032326936721802,
"learning_rate": 8.25601443742697e-07,
"loss": 0.36406025290489197,
"step": 1508
},
{
"epoch": 3.3481152993348116,
"grad_norm": 2.841404914855957,
"learning_rate": 8.213381531339776e-07,
"loss": 0.8044983148574829,
"step": 1510
},
{
"epoch": 3.352549889135255,
"grad_norm": 0.41309988498687744,
"learning_rate": 8.1710081032952e-07,
"loss": 0.4085044860839844,
"step": 1512
},
{
"epoch": 3.3569844789356984,
"grad_norm": 1.5829377174377441,
"learning_rate": 8.128894723376285e-07,
"loss": 0.7842904925346375,
"step": 1514
},
{
"epoch": 3.361419068736142,
"grad_norm": 1.9180912971496582,
"learning_rate": 8.087041958167438e-07,
"loss": 0.46436750888824463,
"step": 1516
},
{
"epoch": 3.3658536585365852,
"grad_norm": 0.8330836296081543,
"learning_rate": 8.04545037074683e-07,
"loss": 0.44874808192253113,
"step": 1518
},
{
"epoch": 3.3702882483370287,
"grad_norm": 1.4987759590148926,
"learning_rate": 8.004120520678768e-07,
"loss": 0.6579011678695679,
"step": 1520
},
{
"epoch": 3.374722838137472,
"grad_norm": 0.4084889590740204,
"learning_rate": 7.963052964006243e-07,
"loss": 0.4690130650997162,
"step": 1522
},
{
"epoch": 3.3791574279379155,
"grad_norm": 2.0353617668151855,
"learning_rate": 7.922248253243367e-07,
"loss": 0.738138735294342,
"step": 1524
},
{
"epoch": 3.3835920177383594,
"grad_norm": 2.91652250289917,
"learning_rate": 7.881706937368005e-07,
"loss": 0.677200973033905,
"step": 1526
},
{
"epoch": 3.388026607538803,
"grad_norm": 0.9351639151573181,
"learning_rate": 7.84142956181436e-07,
"loss": 0.4146648943424225,
"step": 1528
},
{
"epoch": 3.3924611973392462,
"grad_norm": 0.9312740564346313,
"learning_rate": 7.801416668465621e-07,
"loss": 0.3662897050380707,
"step": 1530
},
{
"epoch": 3.3968957871396896,
"grad_norm": 3.5264506340026855,
"learning_rate": 7.76166879564672e-07,
"loss": 0.22805796563625336,
"step": 1532
},
{
"epoch": 3.401330376940133,
"grad_norm": 0.9523420929908752,
"learning_rate": 7.722186478117031e-07,
"loss": 0.5999071002006531,
"step": 1534
},
{
"epoch": 3.4057649667405765,
"grad_norm": 2.5635905265808105,
"learning_rate": 7.682970247063212e-07,
"loss": 0.7036619186401367,
"step": 1536
},
{
"epoch": 3.41019955654102,
"grad_norm": 2.3564810752868652,
"learning_rate": 7.644020630092066e-07,
"loss": 0.5891589522361755,
"step": 1538
},
{
"epoch": 3.4146341463414633,
"grad_norm": 0.8324587345123291,
"learning_rate": 7.605338151223401e-07,
"loss": 0.667372465133667,
"step": 1540
},
{
"epoch": 3.4190687361419068,
"grad_norm": 1.3478339910507202,
"learning_rate": 7.566923330883029e-07,
"loss": 0.5353263020515442,
"step": 1542
},
{
"epoch": 3.42350332594235,
"grad_norm": 1.5044814348220825,
"learning_rate": 7.528776685895731e-07,
"loss": 0.6542061567306519,
"step": 1544
},
{
"epoch": 3.4279379157427936,
"grad_norm": 2.0639114379882812,
"learning_rate": 7.490898729478312e-07,
"loss": 0.16641607880592346,
"step": 1546
},
{
"epoch": 3.4323725055432375,
"grad_norm": 0.10060684382915497,
"learning_rate": 7.45328997123271e-07,
"loss": 0.18048889935016632,
"step": 1548
},
{
"epoch": 3.436807095343681,
"grad_norm": 1.618093490600586,
"learning_rate": 7.415950917139106e-07,
"loss": 0.8383937478065491,
"step": 1550
},
{
"epoch": 3.4412416851441243,
"grad_norm": 1.1742033958435059,
"learning_rate": 7.378882069549166e-07,
"loss": 0.6342257857322693,
"step": 1552
},
{
"epoch": 3.4456762749445677,
"grad_norm": 0.2840815782546997,
"learning_rate": 7.342083927179235e-07,
"loss": 0.35999611020088196,
"step": 1554
},
{
"epoch": 3.450110864745011,
"grad_norm": 1.5036356449127197,
"learning_rate": 7.30555698510366e-07,
"loss": 0.7260861992835999,
"step": 1556
},
{
"epoch": 3.4545454545454546,
"grad_norm": 1.3467886447906494,
"learning_rate": 7.269301734748107e-07,
"loss": 0.7150126099586487,
"step": 1558
},
{
"epoch": 3.458980044345898,
"grad_norm": 0.8913111090660095,
"learning_rate": 7.233318663882968e-07,
"loss": 0.7145206332206726,
"step": 1560
},
{
"epoch": 3.4634146341463414,
"grad_norm": 1.6978760957717896,
"learning_rate": 7.197608256616792e-07,
"loss": 0.1996350884437561,
"step": 1562
},
{
"epoch": 3.467849223946785,
"grad_norm": 0.8095346093177795,
"learning_rate": 7.162170993389763e-07,
"loss": 0.7250151038169861,
"step": 1564
},
{
"epoch": 3.4722838137472283,
"grad_norm": 1.1986325979232788,
"learning_rate": 7.127007350967241e-07,
"loss": 0.40820562839508057,
"step": 1566
},
{
"epoch": 3.4767184035476717,
"grad_norm": 1.269044041633606,
"learning_rate": 7.092117802433362e-07,
"loss": 0.6645058393478394,
"step": 1568
},
{
"epoch": 3.481152993348115,
"grad_norm": 1.3065571784973145,
"learning_rate": 7.057502817184648e-07,
"loss": 0.4576531946659088,
"step": 1570
},
{
"epoch": 3.4855875831485585,
"grad_norm": 0.37347811460494995,
"learning_rate": 7.023162860923722e-07,
"loss": 0.4664113521575928,
"step": 1572
},
{
"epoch": 3.4900221729490024,
"grad_norm": 1.5471532344818115,
"learning_rate": 6.989098395653005e-07,
"loss": 0.911579430103302,
"step": 1574
},
{
"epoch": 3.494456762749446,
"grad_norm": 3.40287709236145,
"learning_rate": 6.955309879668537e-07,
"loss": 0.36696523427963257,
"step": 1576
},
{
"epoch": 3.4988913525498893,
"grad_norm": 1.022657871246338,
"learning_rate": 6.921797767553794e-07,
"loss": 0.42062070965766907,
"step": 1578
},
{
"epoch": 3.5033259423503327,
"grad_norm": 1.3818333148956299,
"learning_rate": 6.88856251017356e-07,
"loss": 0.7136982083320618,
"step": 1580
},
{
"epoch": 3.507760532150776,
"grad_norm": 0.9989206194877625,
"learning_rate": 6.855604554667897e-07,
"loss": 0.8498230576515198,
"step": 1582
},
{
"epoch": 3.5121951219512195,
"grad_norm": 1.0440279245376587,
"learning_rate": 6.822924344446081e-07,
"loss": 0.44726136326789856,
"step": 1584
},
{
"epoch": 3.516629711751663,
"grad_norm": 1.522624135017395,
"learning_rate": 6.790522319180687e-07,
"loss": 0.4782644510269165,
"step": 1586
},
{
"epoch": 3.5210643015521064,
"grad_norm": 0.845657467842102,
"learning_rate": 6.758398914801628e-07,
"loss": 0.7009880542755127,
"step": 1588
},
{
"epoch": 3.52549889135255,
"grad_norm": 2.187683582305908,
"learning_rate": 6.726554563490321e-07,
"loss": 0.39783769845962524,
"step": 1590
},
{
"epoch": 3.529933481152993,
"grad_norm": 4.550779819488525,
"learning_rate": 6.694989693673872e-07,
"loss": 0.4127946197986603,
"step": 1592
},
{
"epoch": 3.5343680709534366,
"grad_norm": 1.568524956703186,
"learning_rate": 6.663704730019285e-07,
"loss": 0.8160148859024048,
"step": 1594
},
{
"epoch": 3.5388026607538805,
"grad_norm": 2.7332797050476074,
"learning_rate": 6.632700093427774e-07,
"loss": 0.1282457411289215,
"step": 1596
},
{
"epoch": 3.5432372505543235,
"grad_norm": 1.79358971118927,
"learning_rate": 6.601976201029095e-07,
"loss": 0.3730695843696594,
"step": 1598
},
{
"epoch": 3.5476718403547673,
"grad_norm": 0.8376679420471191,
"learning_rate": 6.571533466175928e-07,
"loss": 0.4683838486671448,
"step": 1600
},
{
"epoch": 3.5521064301552108,
"grad_norm": 1.0493067502975464,
"learning_rate": 6.541372298438325e-07,
"loss": 0.6612682938575745,
"step": 1602
},
{
"epoch": 3.556541019955654,
"grad_norm": 1.6749497652053833,
"learning_rate": 6.511493103598184e-07,
"loss": 0.8235822319984436,
"step": 1604
},
{
"epoch": 3.5609756097560976,
"grad_norm": 0.9716708660125732,
"learning_rate": 6.481896283643808e-07,
"loss": 0.7026538252830505,
"step": 1606
},
{
"epoch": 3.565410199556541,
"grad_norm": 0.8682565093040466,
"learning_rate": 6.452582236764495e-07,
"loss": 0.06921753287315369,
"step": 1608
},
{
"epoch": 3.5698447893569845,
"grad_norm": 4.531189918518066,
"learning_rate": 6.423551357345154e-07,
"loss": 0.461900919675827,
"step": 1610
},
{
"epoch": 3.574279379157428,
"grad_norm": 2.879300832748413,
"learning_rate": 6.394804035961038e-07,
"loss": 0.14274033904075623,
"step": 1612
},
{
"epoch": 3.5787139689578713,
"grad_norm": 1.3849800825119019,
"learning_rate": 6.366340659372462e-07,
"loss": 0.4771314263343811,
"step": 1614
},
{
"epoch": 3.5831485587583147,
"grad_norm": 1.026705265045166,
"learning_rate": 6.338161610519618e-07,
"loss": 0.7851333022117615,
"step": 1616
},
{
"epoch": 3.587583148558758,
"grad_norm": 2.080756902694702,
"learning_rate": 6.310267268517397e-07,
"loss": 0.4780849516391754,
"step": 1618
},
{
"epoch": 3.5920177383592016,
"grad_norm": 3.4350011348724365,
"learning_rate": 6.282658008650318e-07,
"loss": 0.4769437909126282,
"step": 1620
},
{
"epoch": 3.5964523281596454,
"grad_norm": 1.204390287399292,
"learning_rate": 6.255334202367462e-07,
"loss": 0.6624370813369751,
"step": 1622
},
{
"epoch": 3.6008869179600884,
"grad_norm": 1.616827368736267,
"learning_rate": 6.228296217277481e-07,
"loss": 0.5747263431549072,
"step": 1624
},
{
"epoch": 3.6053215077605323,
"grad_norm": 0.4036061465740204,
"learning_rate": 6.201544417143641e-07,
"loss": 0.1593714952468872,
"step": 1626
},
{
"epoch": 3.6097560975609757,
"grad_norm": 1.0553009510040283,
"learning_rate": 6.175079161878951e-07,
"loss": 0.7550400495529175,
"step": 1628
},
{
"epoch": 3.614190687361419,
"grad_norm": 5.636652946472168,
"learning_rate": 6.148900807541295e-07,
"loss": 0.5534995794296265,
"step": 1630
},
{
"epoch": 3.6186252771618626,
"grad_norm": 1.9536336660385132,
"learning_rate": 6.123009706328659e-07,
"loss": 0.5692697167396545,
"step": 1632
},
{
"epoch": 3.623059866962306,
"grad_norm": 1.257670283317566,
"learning_rate": 6.097406206574378e-07,
"loss": 0.6251094937324524,
"step": 1634
},
{
"epoch": 3.6274944567627494,
"grad_norm": 0.8495803475379944,
"learning_rate": 6.072090652742475e-07,
"loss": 0.42323750257492065,
"step": 1636
},
{
"epoch": 3.631929046563193,
"grad_norm": 4.202381610870361,
"learning_rate": 6.047063385422993e-07,
"loss": 0.46832165122032166,
"step": 1638
},
{
"epoch": 3.6363636363636362,
"grad_norm": 1.8445926904678345,
"learning_rate": 6.022324741327438e-07,
"loss": 0.657563328742981,
"step": 1640
},
{
"epoch": 3.6407982261640797,
"grad_norm": 3.063267230987549,
"learning_rate": 5.997875053284248e-07,
"loss": 0.3927001655101776,
"step": 1642
},
{
"epoch": 3.6452328159645235,
"grad_norm": 1.844231367111206,
"learning_rate": 5.973714650234287e-07,
"loss": 0.6536309123039246,
"step": 1644
},
{
"epoch": 3.6496674057649665,
"grad_norm": 0.11124907433986664,
"learning_rate": 5.949843857226466e-07,
"loss": 0.2834221124649048,
"step": 1646
},
{
"epoch": 3.6541019955654104,
"grad_norm": 0.8278182744979858,
"learning_rate": 5.926262995413329e-07,
"loss": 0.2620082497596741,
"step": 1648
},
{
"epoch": 3.658536585365854,
"grad_norm": 1.227217197418213,
"learning_rate": 5.902972382046742e-07,
"loss": 0.44254299998283386,
"step": 1650
},
{
"epoch": 3.662971175166297,
"grad_norm": 0.9186346530914307,
"learning_rate": 5.879972330473651e-07,
"loss": 0.6525332927703857,
"step": 1652
},
{
"epoch": 3.6674057649667406,
"grad_norm": 0.9581958651542664,
"learning_rate": 5.857263150131825e-07,
"loss": 0.36457884311676025,
"step": 1654
},
{
"epoch": 3.671840354767184,
"grad_norm": 1.0149691104888916,
"learning_rate": 5.834845146545726e-07,
"loss": 0.8156982660293579,
"step": 1656
},
{
"epoch": 3.6762749445676275,
"grad_norm": 2.3508944511413574,
"learning_rate": 5.812718621322386e-07,
"loss": 0.4678148627281189,
"step": 1658
},
{
"epoch": 3.680709534368071,
"grad_norm": 2.148775100708008,
"learning_rate": 5.790883872147341e-07,
"loss": 0.404410719871521,
"step": 1660
},
{
"epoch": 3.6851441241685143,
"grad_norm": 0.22560228407382965,
"learning_rate": 5.769341192780643e-07,
"loss": 0.3717038035392761,
"step": 1662
},
{
"epoch": 3.6895787139689578,
"grad_norm": 0.8757584691047668,
"learning_rate": 5.748090873052892e-07,
"loss": 0.32363370060920715,
"step": 1664
},
{
"epoch": 3.694013303769401,
"grad_norm": 1.1470128297805786,
"learning_rate": 5.727133198861353e-07,
"loss": 0.6273210048675537,
"step": 1666
},
{
"epoch": 3.6984478935698446,
"grad_norm": 1.020920991897583,
"learning_rate": 5.706468452166091e-07,
"loss": 0.5913212299346924,
"step": 1668
},
{
"epoch": 3.7028824833702885,
"grad_norm": 1.009974718093872,
"learning_rate": 5.686096910986189e-07,
"loss": 0.650887131690979,
"step": 1670
},
{
"epoch": 3.7073170731707314,
"grad_norm": 1.03592050075531,
"learning_rate": 5.666018849396016e-07,
"loss": 0.6401125192642212,
"step": 1672
},
{
"epoch": 3.7117516629711753,
"grad_norm": 0.7517452239990234,
"learning_rate": 5.646234537521513e-07,
"loss": 0.6306463479995728,
"step": 1674
},
{
"epoch": 3.7161862527716187,
"grad_norm": 2.245835542678833,
"learning_rate": 5.626744241536589e-07,
"loss": 0.71409010887146,
"step": 1676
},
{
"epoch": 3.720620842572062,
"grad_norm": 1.8823548555374146,
"learning_rate": 5.607548223659519e-07,
"loss": 0.6528919339179993,
"step": 1678
},
{
"epoch": 3.7250554323725056,
"grad_norm": 2.2317075729370117,
"learning_rate": 5.58864674214942e-07,
"loss": 0.30886802077293396,
"step": 1680
},
{
"epoch": 3.729490022172949,
"grad_norm": 0.9859198331832886,
"learning_rate": 5.57004005130279e-07,
"loss": 0.5217846035957336,
"step": 1682
},
{
"epoch": 3.7339246119733924,
"grad_norm": 0.8078064918518066,
"learning_rate": 5.551728401450067e-07,
"loss": 0.3513007164001465,
"step": 1684
},
{
"epoch": 3.738359201773836,
"grad_norm": 2.4825408458709717,
"learning_rate": 5.533712038952278e-07,
"loss": 0.5929620265960693,
"step": 1686
},
{
"epoch": 3.7427937915742793,
"grad_norm": 0.38356509804725647,
"learning_rate": 5.51599120619771e-07,
"loss": 0.25690025091171265,
"step": 1688
},
{
"epoch": 3.7472283813747227,
"grad_norm": 1.4826903343200684,
"learning_rate": 5.498566141598662e-07,
"loss": 0.7508558034896851,
"step": 1690
},
{
"epoch": 3.7516629711751666,
"grad_norm": 1.05094313621521,
"learning_rate": 5.481437079588227e-07,
"loss": 0.5309889912605286,
"step": 1692
},
{
"epoch": 3.7560975609756095,
"grad_norm": 1.0361418724060059,
"learning_rate": 5.464604250617143e-07,
"loss": 0.7995379567146301,
"step": 1694
},
{
"epoch": 3.7605321507760534,
"grad_norm": 1.1136112213134766,
"learning_rate": 5.448067881150697e-07,
"loss": 0.7239155769348145,
"step": 1696
},
{
"epoch": 3.764966740576497,
"grad_norm": 0.940222978591919,
"learning_rate": 5.431828193665664e-07,
"loss": 0.3560809791088104,
"step": 1698
},
{
"epoch": 3.7694013303769403,
"grad_norm": 2.683884620666504,
"learning_rate": 5.415885406647334e-07,
"loss": 0.44195085763931274,
"step": 1700
},
{
"epoch": 3.7738359201773837,
"grad_norm": 2.6246378421783447,
"learning_rate": 5.400239734586551e-07,
"loss": 0.980012059211731,
"step": 1702
},
{
"epoch": 3.778270509977827,
"grad_norm": 0.5143499970436096,
"learning_rate": 5.384891387976845e-07,
"loss": 0.14009836316108704,
"step": 1704
},
{
"epoch": 3.7827050997782705,
"grad_norm": 0.4758577048778534,
"learning_rate": 5.369840573311593e-07,
"loss": 0.40948137640953064,
"step": 1706
},
{
"epoch": 3.787139689578714,
"grad_norm": 1.033705234527588,
"learning_rate": 5.355087493081236e-07,
"loss": 0.4629664719104767,
"step": 1708
},
{
"epoch": 3.7915742793791574,
"grad_norm": 0.836288571357727,
"learning_rate": 5.340632345770564e-07,
"loss": 0.7450593113899231,
"step": 1710
},
{
"epoch": 3.796008869179601,
"grad_norm": 2.316535711288452,
"learning_rate": 5.326475325856036e-07,
"loss": 0.4111533463001251,
"step": 1712
},
{
"epoch": 3.800443458980044,
"grad_norm": 62.51638412475586,
"learning_rate": 5.312616623803174e-07,
"loss": 0.5232793092727661,
"step": 1714
},
{
"epoch": 3.8048780487804876,
"grad_norm": 1.5670796632766724,
"learning_rate": 5.299056426063995e-07,
"loss": 0.6640741229057312,
"step": 1716
},
{
"epoch": 3.8093126385809315,
"grad_norm": 0.8931707739830017,
"learning_rate": 5.2857949150745e-07,
"loss": 0.5484420657157898,
"step": 1718
},
{
"epoch": 3.8137472283813745,
"grad_norm": 1.2039430141448975,
"learning_rate": 5.27283226925222e-07,
"loss": 0.5476232767105103,
"step": 1720
},
{
"epoch": 3.8181818181818183,
"grad_norm": 0.9757826328277588,
"learning_rate": 5.260168662993824e-07,
"loss": 0.7852546572685242,
"step": 1722
},
{
"epoch": 3.8226164079822618,
"grad_norm": 1.4504204988479614,
"learning_rate": 5.247804266672765e-07,
"loss": 0.7212304472923279,
"step": 1724
},
{
"epoch": 3.827050997782705,
"grad_norm": 1.1943776607513428,
"learning_rate": 5.235739246636988e-07,
"loss": 0.6680858731269836,
"step": 1726
},
{
"epoch": 3.8314855875831486,
"grad_norm": 1.8303642272949219,
"learning_rate": 5.223973765206694e-07,
"loss": 0.3614678680896759,
"step": 1728
},
{
"epoch": 3.835920177383592,
"grad_norm": 3.3566129207611084,
"learning_rate": 5.212507980672155e-07,
"loss": 0.4301706552505493,
"step": 1730
},
{
"epoch": 3.8403547671840355,
"grad_norm": 0.91670823097229,
"learning_rate": 5.201342047291587e-07,
"loss": 0.7216442823410034,
"step": 1732
},
{
"epoch": 3.844789356984479,
"grad_norm": 27.859312057495117,
"learning_rate": 5.190476115289063e-07,
"loss": 0.8199055194854736,
"step": 1734
},
{
"epoch": 3.8492239467849223,
"grad_norm": 0.8166837096214294,
"learning_rate": 5.179910330852521e-07,
"loss": 0.7341957092285156,
"step": 1736
},
{
"epoch": 3.8536585365853657,
"grad_norm": 1.106396198272705,
"learning_rate": 5.169644836131759e-07,
"loss": 0.6143337488174438,
"step": 1738
},
{
"epoch": 3.858093126385809,
"grad_norm": 0.8808902502059937,
"learning_rate": 5.159679769236553e-07,
"loss": 0.37879249453544617,
"step": 1740
},
{
"epoch": 3.8625277161862526,
"grad_norm": 1.7568862438201904,
"learning_rate": 5.150015264234782e-07,
"loss": 0.45216262340545654,
"step": 1742
},
{
"epoch": 3.8669623059866964,
"grad_norm": 1.3479468822479248,
"learning_rate": 5.140651451150627e-07,
"loss": 0.6618800759315491,
"step": 1744
},
{
"epoch": 3.8713968957871394,
"grad_norm": 1.1075336933135986,
"learning_rate": 5.131588455962835e-07,
"loss": 0.653846263885498,
"step": 1746
},
{
"epoch": 3.8758314855875833,
"grad_norm": 1.473656415939331,
"learning_rate": 5.122826400602999e-07,
"loss": 0.43444833159446716,
"step": 1748
},
{
"epoch": 3.8802660753880267,
"grad_norm": 1.5921751260757446,
"learning_rate": 5.114365402953946e-07,
"loss": 0.5735731720924377,
"step": 1750
},
{
"epoch": 3.88470066518847,
"grad_norm": 3.2497620582580566,
"learning_rate": 5.106205576848123e-07,
"loss": 0.571108341217041,
"step": 1752
},
{
"epoch": 3.8891352549889135,
"grad_norm": 3.123436212539673,
"learning_rate": 5.09834703206609e-07,
"loss": 0.7177774906158447,
"step": 1754
},
{
"epoch": 3.893569844789357,
"grad_norm": 13.750722885131836,
"learning_rate": 5.090789874335027e-07,
"loss": 0.6665231585502625,
"step": 1756
},
{
"epoch": 3.8980044345898004,
"grad_norm": 1.5528661012649536,
"learning_rate": 5.083534205327321e-07,
"loss": 0.7704042792320251,
"step": 1758
},
{
"epoch": 3.902439024390244,
"grad_norm": 0.040434010326862335,
"learning_rate": 5.076580122659192e-07,
"loss": 0.0017574625089764595,
"step": 1760
},
{
"epoch": 3.9068736141906872,
"grad_norm": 2.7411434650421143,
"learning_rate": 5.069927719889383e-07,
"loss": 0.30478376150131226,
"step": 1762
},
{
"epoch": 3.9113082039911307,
"grad_norm": 0.1822403222322464,
"learning_rate": 5.063577086517894e-07,
"loss": 0.3710252046585083,
"step": 1764
},
{
"epoch": 3.9157427937915745,
"grad_norm": 2.8029940128326416,
"learning_rate": 5.057528307984792e-07,
"loss": 0.47346100211143494,
"step": 1766
},
{
"epoch": 3.9201773835920175,
"grad_norm": 1.2661375999450684,
"learning_rate": 5.051781465669053e-07,
"loss": 0.7573010921478271,
"step": 1768
},
{
"epoch": 3.9246119733924614,
"grad_norm": 0.8320476412773132,
"learning_rate": 5.04633663688746e-07,
"loss": 0.7401853203773499,
"step": 1770
},
{
"epoch": 3.929046563192905,
"grad_norm": 2.3888723850250244,
"learning_rate": 5.04119389489358e-07,
"loss": 0.2338392287492752,
"step": 1772
},
{
"epoch": 3.933481152993348,
"grad_norm": 1.4304852485656738,
"learning_rate": 5.036353308876764e-07,
"loss": 0.6549674272537231,
"step": 1774
},
{
"epoch": 3.9379157427937916,
"grad_norm": 0.8762494325637817,
"learning_rate": 5.031814943961221e-07,
"loss": 0.6887394785881042,
"step": 1776
},
{
"epoch": 3.942350332594235,
"grad_norm": 0.3672865927219391,
"learning_rate": 5.027578861205139e-07,
"loss": 0.13220834732055664,
"step": 1778
},
{
"epoch": 3.9467849223946785,
"grad_norm": 4.185744285583496,
"learning_rate": 5.023645117599877e-07,
"loss": 0.45351341366767883,
"step": 1780
},
{
"epoch": 3.951219512195122,
"grad_norm": 0.27578166127204895,
"learning_rate": 5.020013766069176e-07,
"loss": 0.3945060670375824,
"step": 1782
},
{
"epoch": 3.9556541019955653,
"grad_norm": 0.877690315246582,
"learning_rate": 5.016684855468464e-07,
"loss": 0.44928890466690063,
"step": 1784
},
{
"epoch": 3.9600886917960088,
"grad_norm": 0.863924503326416,
"learning_rate": 5.013658430584194e-07,
"loss": 0.6446860432624817,
"step": 1786
},
{
"epoch": 3.964523281596452,
"grad_norm": 0.9856058359146118,
"learning_rate": 5.010934532133236e-07,
"loss": 0.4857484996318817,
"step": 1788
},
{
"epoch": 3.9689578713968956,
"grad_norm": 2.3645358085632324,
"learning_rate": 5.008513196762342e-07,
"loss": 0.5328899621963501,
"step": 1790
},
{
"epoch": 3.9733924611973395,
"grad_norm": 1.1871687173843384,
"learning_rate": 5.006394457047638e-07,
"loss": 0.6613432168960571,
"step": 1792
},
{
"epoch": 3.9778270509977824,
"grad_norm": 2.269967794418335,
"learning_rate": 5.004578341494197e-07,
"loss": 0.40819886326789856,
"step": 1794
},
{
"epoch": 3.9822616407982263,
"grad_norm": 0.9505739808082581,
"learning_rate": 5.003064874535649e-07,
"loss": 0.5899269580841064,
"step": 1796
},
{
"epoch": 3.9866962305986697,
"grad_norm": 1.2086498737335205,
"learning_rate": 5.00185407653385e-07,
"loss": 0.6620221138000488,
"step": 1798
},
{
"epoch": 3.991130820399113,
"grad_norm": 2.2357959747314453,
"learning_rate": 5.000945963778627e-07,
"loss": 0.6272152662277222,
"step": 1800
},
{
"epoch": 3.9955654101995566,
"grad_norm": 1.1447160243988037,
"learning_rate": 5.000340548487528e-07,
"loss": 0.7161806225776672,
"step": 1802
},
{
"epoch": 4.0,
"grad_norm": 1.5152302980422974,
"learning_rate": 5.000037838805682e-07,
"loss": 0.3338135778903961,
"step": 1804
},
{
"epoch": 4.0,
"step": 1804,
"total_flos": 3.4175049861232067e+18,
"train_loss": 0.8372825072394754,
"train_runtime": 8301.3852,
"train_samples_per_second": 6.519,
"train_steps_per_second": 0.217
}
],
"logging_steps": 2,
"max_steps": 1804,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 99999,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.4175049861232067e+18,
"train_batch_size": 3,
"trial_name": null,
"trial_params": null
}